Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief methods for arnoldi iteration
10 : !> \par History
11 : !> 2014.09 created [Florian Schiffmann]
12 : !> 2023.12 Removed support for single-precision [Ole Schuett]
13 : !> 2024.12 Removed support for complex input matrices [Ole Schuett]
14 : !> \author Florian Schiffmann
15 : ! **************************************************************************************************
16 : MODULE arnoldi_methods
17 : USE arnoldi_geev, ONLY: arnoldi_general_local_diag,&
18 : arnoldi_symm_local_diag,&
19 : arnoldi_tridiag_local_diag
20 : USE arnoldi_types, ONLY: arnoldi_control_type,&
21 : arnoldi_data_type,&
22 : arnoldi_env_type,&
23 : get_control,&
24 : get_data,&
25 : m_x_v_vectors_type
26 : USE arnoldi_vector, ONLY: dbcsr_matrix_colvec_multiply
27 : USE cp_dbcsr_api, ONLY: &
28 : dbcsr_add, dbcsr_copy, dbcsr_get_data_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
29 : dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
30 : dbcsr_p_type, dbcsr_scale, dbcsr_type
31 : USE kinds, ONLY: dp
32 : USE message_passing, ONLY: mp_comm_type
33 : #include "../base/base_uses.f90"
34 :
35 : IMPLICIT NONE
36 :
37 : PRIVATE
38 :
39 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_methods'
40 :
41 : PUBLIC :: arnoldi_init, build_subspace, compute_evals, arnoldi_iram, &
42 : gev_arnoldi_init, gev_build_subspace, gev_update_data
43 :
44 : CONTAINS
45 :
46 : ! **************************************************************************************************
47 : !> \brief Alogorithm for the implicit restarts in the arnoldi method
48 : !> this is an early implementation which scales subspace size^4
49 : !> by replacing the lapack calls with direct math the
50 : !> QR and gemms can be made linear and a N^2 sacling will be acchieved
51 : !> however this already sets the framework but should be used with care.
52 : !> Currently all based on lapack.
53 : !> \param arnoldi_env ...
54 : ! **************************************************************************************************
55 250 : SUBROUTINE arnoldi_iram(arnoldi_env)
56 : TYPE(arnoldi_env_type) :: arnoldi_env
57 :
58 : CHARACTER(LEN=*), PARAMETER :: routineN = 'arnoldi_iram'
59 :
60 250 : COMPLEX(dp), ALLOCATABLE, DIMENSION(:) :: tau, work, work_measure
61 250 : COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :) :: Q, safe_mat, tmp_mat, tmp_mat1
62 : INTEGER :: handle, i, info, j, lwork, msize, nwant
63 : REAL(kind=dp) :: beta, sigma
64 250 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: Qdata
65 : TYPE(arnoldi_control_type), POINTER :: control
66 : TYPE(arnoldi_data_type), POINTER :: ar_data
67 :
68 250 : CALL timeset(routineN, handle)
69 :
70 : ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference
71 250 : ar_data => get_data(arnoldi_env)
72 250 : control => get_control(arnoldi_env)
73 250 : msize = control%current_step
74 250 : nwant = control%nval_out
75 1500 : ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))
76 1250 : ALLOCATE (Q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))
77 250 : ALLOCATE (work_measure(1))
78 1500 : ALLOCATE (tau(msize)); ALLOCATE (Qdata(msize, msize))
79 : !make Q identity
80 225914 : Q = CMPLX(0.0, 0.0, dp)
81 6778 : DO i = 1, msize
82 6778 : Q(i, i) = CMPLX(1.0, 0.0, dp)
83 : END DO
84 :
85 : ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack
86 225914 : tmp_mat(:, :) = CMPLX(ar_data%Hessenberg(1:msize, 1:msize), 0.0, KIND=dp)
87 225914 : safe_mat(:, :) = tmp_mat(:, :)
88 :
89 6778 : DO i = 1, msize
90 : ! A bit a strange check but in the end we only want to shift the unwanted evals
91 212858 : IF (ANY(control%selected_ind == i)) CYCLE
92 : ! Here we shift the matrix by subtracting unwanted evals from the diagonal
93 212108 : DO j = 1, msize
94 212108 : tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)
95 : END DO
96 : ! Now we repair the damage by QR factorizing
97 6028 : lwork = -1
98 6028 : CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)
99 6028 : lwork = INT(work_measure(1))
100 6028 : IF (ALLOCATED(work)) THEN
101 5778 : IF (SIZE(work) .LT. lwork) THEN
102 0 : DEALLOCATE (work)
103 : END IF
104 : END IF
105 6528 : IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))
106 6028 : CALL zgeqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)
107 : ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)
108 6028 : CALL zungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)
109 : ! update Q=Q*Q_current
110 9112716 : tmp_mat1(:, :) = Q(:, :)
111 : CALL zgemm('N', 'N', msize, msize, msize, CMPLX(1.0, 0.0, dp), tmp_mat1, &
112 6028 : msize, tmp_mat, msize, CMPLX(0.0, 0.0, dp), Q, msize)
113 : ! Update H=(Q*)HQ
114 : CALL zgemm('C', 'N', msize, msize, msize, CMPLX(1.0, 0.0, dp), tmp_mat, &
115 6028 : msize, safe_mat, msize, CMPLX(0.0, 0.0, dp), tmp_mat1, msize)
116 : CALL zgemm('N', 'N', msize, msize, msize, CMPLX(1.0, 0.0, dp), tmp_mat1, &
117 6028 : msize, tmp_mat, msize, CMPLX(0.0, 0.0, dp), safe_mat, msize)
118 :
119 : ! this one is crucial for numerics not to accumulate noise in the subdiagonals
120 212108 : DO j = 1, msize
121 4359320 : safe_mat(j + 2:msize, j) = CMPLX(0.0, 0.0, dp)
122 : END DO
123 9113466 : tmp_mat(:, :) = safe_mat(:, :)
124 : END DO
125 :
126 : ! Now we can compute our restart quantities
127 232442 : ar_data%Hessenberg = 0.0_dp
128 225914 : ar_data%Hessenberg(1:msize, 1:msize) = REAL(safe_mat, KIND=dp)
129 225914 : Qdata(:, :) = REAL(Q(:, :), KIND=dp)
130 :
131 250 : beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = Qdata(msize, nwant)
132 :
133 : !update the residuum and the basis vectors
134 250 : IF (control%local_comp) THEN
135 443564 : ar_data%f_vec = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma
136 1267924 : ar_data%local_history(:, 1:nwant) = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, 1:nwant))
137 : END IF
138 : ! Set the current step to nwant so the subspace build knows where to start
139 250 : control%current_step = nwant
140 :
141 250 : DEALLOCATE (tmp_mat, safe_mat, Q, Qdata, tmp_mat1, work, tau, work_measure)
142 250 : CALL timestop(handle)
143 :
144 250 : END SUBROUTINE arnoldi_iram
145 :
146 : ! **************************************************************************************************
147 : !> \brief Call the correct eigensolver, in the arnoldi method only the right
148 : !> eigenvectors are used. Lefts are created here but dumped immediately
149 : !> This is only the serial version
150 : !> \param arnoldi_env ...
151 : ! **************************************************************************************************
152 129788 : SUBROUTINE compute_evals(arnoldi_env)
153 : TYPE(arnoldi_env_type) :: arnoldi_env
154 :
155 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_evals'
156 :
157 : COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :) :: levec
158 : INTEGER :: handle, ndim
159 : TYPE(arnoldi_control_type), POINTER :: control
160 : TYPE(arnoldi_data_type), POINTER :: ar_data
161 :
162 129788 : CALL timeset(routineN, handle)
163 :
164 129788 : ar_data => get_data(arnoldi_env)
165 129788 : control => get_control(arnoldi_env)
166 129788 : ndim = control%current_step
167 519152 : ALLOCATE (levec(ndim, ndim))
168 :
169 : ! Needs antoher interface as the calls to real and complex geev differ (sucks!)
170 : ! only perform the diagonalization on processors which hold data
171 129788 : IF (control%generalized_ev) THEN
172 : CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
173 4463 : ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))
174 : ELSE
175 125325 : IF (control%symmetric) THEN
176 : CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
177 8652 : ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
178 : ELSE
179 : CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
180 116673 : ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
181 : END IF
182 : END IF
183 :
184 129788 : DEALLOCATE (levec)
185 129788 : CALL timestop(handle)
186 :
187 129788 : END SUBROUTINE compute_evals
188 :
189 : ! **************************************************************************************************
190 : !> \brief Interface for the initialization of the arnoldi subspace creation
191 : !> currently it can only setup a random vector but can be improved to
192 : !> various types of restarts easily
193 : !> \param matrix pointer to the matrices as described in main interface
194 : !> \param vectors work vectors for the matrix vector multiplications
195 : !> \param arnoldi_env all data concerning the subspace
196 : ! **************************************************************************************************
197 125075 : SUBROUTINE arnoldi_init(matrix, vectors, arnoldi_env)
198 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
199 : TYPE(m_x_v_vectors_type) :: vectors
200 : TYPE(arnoldi_env_type) :: arnoldi_env
201 :
202 : CHARACTER(LEN=*), PARAMETER :: routineN = 'arnoldi_init'
203 :
204 : INTEGER :: col, col_size, handle, i, iseed(4), &
205 : ncol_local, nrow_local, row, row_size
206 : LOGICAL :: local_comp
207 : REAL(dp) :: rnorm
208 : REAL(kind=dp) :: norm
209 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: v_vec, w_vec
210 125075 : REAL(kind=dp), DIMENSION(:), POINTER :: data_vec
211 : TYPE(arnoldi_control_type), POINTER :: control
212 : TYPE(arnoldi_data_type), POINTER :: ar_data
213 : TYPE(dbcsr_iterator_type) :: iter
214 : TYPE(mp_comm_type) :: pcol_group
215 :
216 125075 : CALL timeset(routineN, handle)
217 :
218 125075 : control => get_control(arnoldi_env)
219 125075 : pcol_group = control%pcol_group
220 125075 : local_comp = control%local_comp
221 :
222 125075 : ar_data => get_data(arnoldi_env)
223 :
224 : ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
225 125075 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
226 331575 : ALLOCATE (v_vec(nrow_local))
227 206500 : ALLOCATE (w_vec(nrow_local))
228 1543657 : v_vec = 0.0_dp; w_vec = 0.0_dp
229 135060307 : ar_data%Hessenberg = 0.0_dp
230 :
231 125075 : IF (control%has_initial_vector) THEN
232 : ! after calling the set routine the initial vector is stored in f_vec
233 2735 : CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
234 : ELSE
235 : ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
236 122340 : CALL dbcsr_iterator_start(iter, vectors%input_vec)
237 227061 : DO WHILE (dbcsr_iterator_blocks_left(iter))
238 104721 : CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
239 104721 : iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
240 227061 : CALL dlarnv(2, iseed, row_size*col_size, data_vec)
241 : END DO
242 122340 : CALL dbcsr_iterator_stop(iter)
243 : END IF
244 :
245 125075 : CALL transfer_dbcsr_to_local_array(vectors%input_vec, v_vec, nrow_local, control%local_comp)
246 :
247 : ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)
248 125075 : CALL compute_norms(v_vec, norm, rnorm, control%pcol_group)
249 :
250 125075 : IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
251 125075 : CALL dbcsr_scale(vectors%input_vec, REAL(1.0, dp)/rnorm)
252 :
253 : ! Everything prepared, initialize the Arnoldi iteration
254 125075 : CALL transfer_dbcsr_to_local_array(vectors%input_vec, v_vec, nrow_local, control%local_comp)
255 :
256 : ! This permits to compute the subspace of a matrix which is a product of multiple matrices
257 250150 : DO i = 1, SIZE(matrix)
258 : CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
259 125075 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
260 250150 : CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)
261 : END DO
262 :
263 125075 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, w_vec, nrow_local, control%local_comp)
264 :
265 : ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal
266 834366 : ar_data%Hessenberg(1, 1) = DOT_PRODUCT(v_vec, w_vec)
267 125075 : CALL pcol_group%sum(ar_data%Hessenberg(1, 1))
268 834366 : ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)
269 :
270 834366 : ar_data%local_history(:, 1) = v_vec(:)
271 :
272 : ! We did the first step in here so we should set the current step for the subspace generation accordingly
273 125075 : control%current_step = 1
274 :
275 125075 : DEALLOCATE (v_vec, w_vec)
276 125075 : CALL timestop(handle)
277 :
278 250150 : END SUBROUTINE arnoldi_init
279 :
280 : ! **************************************************************************************************
281 : !> \brief Computes the initial guess for the solution of the generalized eigenvalue
282 : !> using the arnoldi method
283 : !> \param matrix pointer to the matrices as described in main interface
284 : !> \param matrix_arnoldi ...
285 : !> \param vectors work vectors for the matrix vector multiplications
286 : !> \param arnoldi_env all data concerning the subspace
287 : ! **************************************************************************************************
288 3636 : SUBROUTINE gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_env)
289 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix, matrix_arnoldi
290 : TYPE(m_x_v_vectors_type) :: vectors
291 : TYPE(arnoldi_env_type) :: arnoldi_env
292 :
293 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_arnoldi_init'
294 :
295 : INTEGER :: col, col_size, handle, iseed(4), &
296 : ncol_local, nrow_local, row, row_size
297 : LOGICAL :: local_comp
298 : REAL(dp) :: rnorm
299 : REAL(kind=dp) :: denom, norm
300 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: v_vec, w_vec
301 3636 : REAL(kind=dp), DIMENSION(:), POINTER :: data_vec
302 : TYPE(arnoldi_control_type), POINTER :: control
303 : TYPE(arnoldi_data_type), POINTER :: ar_data
304 : TYPE(dbcsr_iterator_type) :: iter
305 : TYPE(mp_comm_type) :: pcol_group
306 :
307 3636 : CALL timeset(routineN, handle)
308 :
309 3636 : control => get_control(arnoldi_env)
310 3636 : pcol_group = control%pcol_group
311 3636 : local_comp = control%local_comp
312 :
313 3636 : ar_data => get_data(arnoldi_env)
314 :
315 : ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
316 3636 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
317 10880 : ALLOCATE (v_vec(nrow_local))
318 7244 : ALLOCATE (w_vec(nrow_local))
319 125550 : v_vec = 0.0_dp; w_vec = 0.0_dp
320 1603476 : ar_data%Hessenberg = 0.0_dp
321 :
322 3636 : IF (control%has_initial_vector) THEN
323 : ! after calling the set routine the initial vector is stored in f_vec
324 1971 : CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
325 : ELSE
326 : ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
327 1665 : CALL dbcsr_iterator_start(iter, vectors%input_vec)
328 5040 : DO WHILE (dbcsr_iterator_blocks_left(iter))
329 3375 : CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
330 3375 : iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
331 5040 : CALL dlarnv(2, iseed, row_size*col_size, data_vec)
332 : END DO
333 1665 : CALL dbcsr_iterator_stop(iter)
334 : END IF
335 :
336 3636 : CALL transfer_dbcsr_to_local_array(vectors%input_vec, v_vec, nrow_local, control%local_comp)
337 :
338 : ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)
339 3636 : CALL compute_norms(v_vec, norm, rnorm, control%pcol_group)
340 :
341 3636 : IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
342 3636 : CALL dbcsr_scale(vectors%input_vec, REAL(1.0, dp)/rnorm)
343 :
344 : CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
345 3636 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
346 :
347 3636 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, w_vec, nrow_local, control%local_comp)
348 :
349 : ar_data%rho_scale = 0.0_dp
350 64593 : ar_data%rho_scale = DOT_PRODUCT(v_vec, w_vec)
351 3636 : CALL pcol_group%sum(ar_data%rho_scale)
352 :
353 : CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
354 3636 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
355 :
356 3636 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, w_vec, nrow_local, control%local_comp)
357 :
358 : denom = 0.0_dp
359 64593 : denom = DOT_PRODUCT(v_vec, w_vec)
360 3636 : CALL pcol_group%sum(denom)
361 3636 : IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom
362 3636 : CALL control%mp_group%bcast(ar_data%rho_scale, 0)
363 :
364 : ! if the maximum ev is requested we need to optimize with -A-rho*B
365 3636 : CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
366 3636 : CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_dp, -ar_data%rho_scale)
367 :
368 64593 : ar_data%x_vec = v_vec
369 :
370 3636 : CALL timestop(handle)
371 :
372 10908 : END SUBROUTINE gev_arnoldi_init
373 :
374 : ! **************************************************************************************************
375 : !> \brief Here we create the Krylov subspace and fill the Hessenberg matrix
376 : !> convergence check is only performed on subspace convergence
377 : !> Gram Schidt is used to orthonogonalize.
378 : !> If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward
379 : !> correction is performed
380 : !> \param matrix ...
381 : !> \param vectors ...
382 : !> \param arnoldi_env ...
383 : ! **************************************************************************************************
384 125325 : SUBROUTINE build_subspace(matrix, vectors, arnoldi_env)
385 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
386 : TYPE(m_x_v_vectors_type), TARGET :: vectors
387 : TYPE(arnoldi_env_type) :: arnoldi_env
388 :
389 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_subspace'
390 :
391 : INTEGER :: handle, i, j, ncol_local, nrow_local
392 : REAL(dp) :: rnorm
393 : REAL(kind=dp) :: norm
394 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
395 : TYPE(arnoldi_control_type), POINTER :: control
396 : TYPE(arnoldi_data_type), POINTER :: ar_data
397 : TYPE(dbcsr_type), POINTER :: input_vec, result_vec, swap_vec
398 :
399 125325 : CALL timeset(routineN, handle)
400 :
401 125325 : ar_data => get_data(arnoldi_env)
402 125325 : control => get_control(arnoldi_env)
403 125325 : control%converged = .FALSE.
404 :
405 : ! create the vectors required during the iterations
406 125325 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
407 495675 : ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
408 1700366 : v_vec = 0.0_dp; w_vec = 0.0_dp
409 626625 : ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
410 :
411 594334 : DO j = control%current_step, control%max_iter - 1
412 :
413 : ! compute the vector norm of the residuum, get it real valued as well (rnorm)
414 591316 : CALL compute_norms(ar_data%f_vec, norm, rnorm, control%pcol_group)
415 :
416 : ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that
417 591316 : IF (control%myproc == 0) control%converged = rnorm .LT. REAL(control%threshold, dp)
418 591316 : CALL control%mp_group%bcast(control%converged, 0)
419 591316 : IF (control%converged) EXIT
420 :
421 : ! transfer normalized residdum to history and its norm to the Hessenberg matrix
422 469009 : IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
423 12784755 : v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm
424 :
425 469009 : input_vec => vectors%input_vec
426 469009 : result_vec => vectors%result_vec
427 469009 : CALL transfer_local_array_to_dbcsr(input_vec, v_vec, nrow_local, control%local_comp)
428 :
429 : ! This permits to compute the subspace of a matrix which is a product of two matrices
430 938018 : DO i = 1, SIZE(matrix)
431 : CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, 1.0_dp, &
432 469009 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
433 469009 : swap_vec => input_vec
434 469009 : input_vec => result_vec
435 938018 : result_vec => swap_vec
436 : END DO
437 :
438 469009 : CALL transfer_dbcsr_to_local_array(input_vec, w_vec, nrow_local, control%local_comp)
439 :
440 : ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
441 : CALL Gram_Schmidt_ortho(h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &
442 469009 : ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)
443 :
444 : ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics
445 : ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it
446 : CALL DGKS_ortho(h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &
447 469009 : ar_data%local_history, control%local_comp, control%pcol_group)
448 : ! Finally we can put the projections into our Hessenberg matrix
449 4459156 : ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)
450 594334 : control%current_step = j + 1
451 : END DO
452 :
453 : ! compute the vector norm of the final residuum and put it in to Hessenberg
454 125325 : CALL compute_norms(ar_data%f_vec, norm, rnorm, control%pcol_group)
455 125325 : ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm
456 :
457 : ! broadcast the Hessenberg matrix so we don't need to care later on
458 270460173 : CALL control%mp_group%bcast(ar_data%Hessenberg, 0)
459 :
460 125325 : DEALLOCATE (v_vec, w_vec, h_vec, s_vec)
461 125325 : CALL timestop(handle)
462 :
463 250650 : END SUBROUTINE build_subspace
464 :
465 : ! **************************************************************************************************
466 : !> \brief builds the basis rothogonal wrt. the metric.
467 : !> The structure looks similar to normal arnoldi but norms, vectors and
468 : !> matrix_vector products are very differently defined. Therefore it is
469 : !> cleaner to put it in a separate subroutine to avoid confusion
470 : !> \param matrix ...
471 : !> \param vectors ...
472 : !> \param arnoldi_env ...
473 : ! **************************************************************************************************
474 4463 : SUBROUTINE gev_build_subspace(matrix, vectors, arnoldi_env)
475 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
476 : TYPE(m_x_v_vectors_type) :: vectors
477 : TYPE(arnoldi_env_type) :: arnoldi_env
478 :
479 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_build_subspace'
480 :
481 : INTEGER :: handle, j, ncol_local, nrow_local
482 : REAL(kind=dp) :: norm
483 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
484 4463 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: BZmat, CZmat, Zmat
485 : TYPE(arnoldi_control_type), POINTER :: control
486 : TYPE(arnoldi_data_type), POINTER :: ar_data
487 : TYPE(mp_comm_type) :: pcol_group
488 :
489 4463 : CALL timeset(routineN, handle)
490 :
491 4463 : ar_data => get_data(arnoldi_env)
492 4463 : control => get_control(arnoldi_env)
493 4463 : control%converged = .FALSE.
494 4463 : pcol_group = control%pcol_group
495 :
496 : ! create the vectors required during the iterations
497 4463 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
498 17794 : ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
499 201761 : v_vec = 0.0_dp; w_vec = 0.0_dp
500 17852 : ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
501 26720 : ALLOCATE (Zmat(nrow_local, control%max_iter)); ALLOCATE (CZmat(nrow_local, control%max_iter))
502 13360 : ALLOCATE (BZmat(nrow_local, control%max_iter))
503 :
504 4463 : CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)
505 : CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
506 4463 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
507 4463 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, BZmat(:, 1), nrow_local, control%local_comp)
508 :
509 : norm = 0.0_dp
510 103112 : norm = DOT_PRODUCT(ar_data%x_vec, BZmat(:, 1))
511 4463 : CALL pcol_group%sum(norm)
512 4463 : IF (control%local_comp) THEN
513 201732 : Zmat(:, 1) = ar_data%x_vec/SQRT(norm); BZmat(:, 1) = BZmat(:, 1)/SQRT(norm)
514 : END IF
515 :
516 73921 : DO j = 1, control%max_iter
517 73921 : control%current_step = j
518 73921 : CALL transfer_local_array_to_dbcsr(vectors%input_vec, Zmat(:, j), nrow_local, control%local_comp)
519 : CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
520 73921 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
521 73921 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, CZmat(:, j), nrow_local, control%local_comp)
522 1889032 : w_vec(:) = CZmat(:, j)
523 :
524 : ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
525 : CALL Gram_Schmidt_ortho(h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &
526 73921 : BZmat, Zmat, control%local_comp, control%pcol_group)
527 :
528 : ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics
529 : ! can becom a problem later on, there is probably a good check, but we don't perform it
530 : CALL DGKS_ortho(h_vec, ar_data%f_vec, s_vec, nrow_local, j, BZmat, &
531 73921 : Zmat, control%local_comp, control%pcol_group)
532 :
533 73921 : CALL transfer_local_array_to_dbcsr(vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
534 : CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
535 73921 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
536 73921 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, v_vec, nrow_local, control%local_comp)
537 : norm = 0.0_dp
538 1889032 : norm = DOT_PRODUCT(ar_data%f_vec, v_vec)
539 73921 : CALL pcol_group%sum(norm)
540 :
541 73921 : IF (control%myproc == 0) control%converged = REAL(norm, dp) .LT. EPSILON(REAL(1.0, dp))
542 73921 : CALL control%mp_group%bcast(control%converged, 0)
543 73921 : IF (control%converged) EXIT
544 71851 : IF (j == control%max_iter - 1) EXIT
545 :
546 73921 : IF (control%local_comp) THEN
547 3502046 : Zmat(:, j + 1) = ar_data%f_vec/SQRT(norm); BZmat(:, j + 1) = v_vec(:)/SQRT(norm)
548 : END IF
549 : END DO
550 :
551 : ! getting a bit more complicated here as the final matrix is again a product which has to be computed with the
552 : ! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,
553 : ! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid
554 1968183 : ar_data%Hessenberg = 0.0_dp
555 4463 : IF (control%local_comp) THEN
556 : ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &
557 22262180 : MATMUL(TRANSPOSE(CZmat(:, 1:control%current_step)), Zmat(:, 1:control%current_step))
558 : END IF
559 3931903 : CALL control%mp_group%sum(ar_data%Hessenberg)
560 :
561 2066703 : ar_data%local_history = Zmat
562 : ! broadcast the Hessenberg matrix so we don't need to care later on
563 :
564 4463 : DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (CZmat);
565 4463 : DEALLOCATE (Zmat); DEALLOCATE (BZmat)
566 :
567 4463 : CALL timestop(handle)
568 :
569 8926 : END SUBROUTINE gev_build_subspace
570 :
571 : ! **************************************************************************************************
572 : !> \brief Updates all data after an inner loop of the generalized ev arnoldi.
573 : !> Updates rho and C=A-rho*B accordingly.
574 : !> As an update scheme is used for he ev, the output ev has to be replaced
575 : !> with the updated one.
576 : !> Furthermore a convergence check is performed. The mv product could
577 : !> be skiiped by making clever use of the precomputed data, However,
578 : !> it is most likely not worth the effort.
579 : !> \param matrix ...
580 : !> \param matrix_arnoldi ...
581 : !> \param vectors ...
582 : !> \param arnoldi_env ...
583 : ! **************************************************************************************************
584 4463 : SUBROUTINE gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_env)
585 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix, matrix_arnoldi
586 : TYPE(m_x_v_vectors_type) :: vectors
587 : TYPE(arnoldi_env_type) :: arnoldi_env
588 :
589 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_update_data'
590 :
591 : COMPLEX(dp) :: val
592 : INTEGER :: handle, i, ind, ncol_local, nrow_local
593 : REAL(dp) :: rnorm
594 : REAL(kind=dp) :: norm
595 4463 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: v_vec
596 : TYPE(arnoldi_control_type), POINTER :: control
597 : TYPE(arnoldi_data_type), POINTER :: ar_data
598 :
599 4463 : CALL timeset(routineN, handle)
600 :
601 4463 : control => get_control(arnoldi_env)
602 :
603 4463 : ar_data => get_data(arnoldi_env)
604 :
605 : ! compute the new shift, hack around the problem templating the conversion
606 4463 : val = ar_data%evals(control%selected_ind(1))
607 4463 : ar_data%rho_scale = ar_data%rho_scale + REAL(val, dp)
608 : ! compute the new eigenvector / initial guess for the next arnoldi loop
609 103112 : ar_data%x_vec = 0.0_dp
610 78384 : DO i = 1, control%current_step
611 73921 : val = ar_data%revec(i, control%selected_ind(1))
612 1893495 : ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*REAL(val, dp)
613 : END DO
614 : ! ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&
615 : ! ar_data%revec(1:control%current_step,control%selected_ind(1)))
616 :
617 : ! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B
618 4463 : CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
619 4463 : CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, 1.0_dp, -ar_data%rho_scale)
620 :
621 : ! compute convergence and set the correct eigenvalue and eigenvector
622 4463 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
623 4463 : IF (ncol_local > 0) THEN
624 13360 : ALLOCATE (v_vec(nrow_local))
625 4463 : CALL compute_norms(ar_data%x_vec, norm, rnorm, control%pcol_group)
626 103112 : v_vec(:) = ar_data%x_vec(:)/rnorm
627 : END IF
628 4463 : CALL transfer_local_array_to_dbcsr(vectors%input_vec, v_vec, nrow_local, control%local_comp)
629 : CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, 1.0_dp, &
630 4463 : 0.0_dp, vectors%rep_row_vec, vectors%rep_col_vec)
631 4463 : CALL transfer_dbcsr_to_local_array(vectors%result_vec, v_vec, nrow_local, control%local_comp)
632 4463 : IF (ncol_local > 0) THEN
633 4463 : CALL compute_norms(v_vec, norm, rnorm, control%pcol_group)
634 : ! check convergence
635 4463 : control%converged = rnorm .LT. control%threshold
636 4463 : DEALLOCATE (v_vec)
637 : END IF
638 : ! and broadcast the real eigenvalue
639 4463 : CALL control%mp_group%bcast(control%converged, 0)
640 4463 : ind = control%selected_ind(1)
641 4463 : CALL control%mp_group%bcast(ar_data%rho_scale, 0)
642 :
643 : ! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign
644 4463 : ar_data%evals(ind) = ar_data%rho_scale
645 :
646 4463 : CALL timestop(handle)
647 :
648 4463 : END SUBROUTINE gev_update_data
649 :
650 : ! **************************************************************************************************
651 : !> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array
652 : !> \param vec ...
653 : !> \param array ...
654 : !> \param n ...
655 : !> \param is_local ...
656 : ! **************************************************************************************************
657 1011910 : SUBROUTINE transfer_dbcsr_to_local_array(vec, array, n, is_local)
658 : TYPE(dbcsr_type) :: vec
659 : REAL(kind=dp), DIMENSION(:) :: array
660 : INTEGER :: n
661 : LOGICAL :: is_local
662 :
663 1011910 : REAL(kind=dp), DIMENSION(:), POINTER :: data_vec
664 :
665 2023820 : data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_dp)
666 13308047 : IF (is_local) array(1:n) = data_vec(1:n)
667 :
668 1011910 : END SUBROUTINE transfer_dbcsr_to_local_array
669 :
670 : ! **************************************************************************************************
671 : !> \brief The inverse routine transferring data back from an array to a dbcsr
672 : !> \param vec ...
673 : !> \param array ...
674 : !> \param n ...
675 : !> \param is_local ...
676 : ! **************************************************************************************************
677 630483 : SUBROUTINE transfer_local_array_to_dbcsr(vec, array, n, is_local)
678 : TYPE(dbcsr_type) :: vec
679 : REAL(kind=dp), DIMENSION(:) :: array
680 : INTEGER :: n
681 : LOGICAL :: is_local
682 :
683 630483 : REAL(kind=dp), DIMENSION(:), POINTER :: data_vec
684 :
685 1260966 : data_vec => dbcsr_get_data_p(vec, select_data_type=0.0_dp)
686 10692045 : IF (is_local) data_vec(1:n) = array(1:n)
687 :
688 630483 : END SUBROUTINE transfer_local_array_to_dbcsr
689 :
690 : ! **************************************************************************************************
691 : !> \brief Gram-Schmidt in matrix vector form
692 : !> \param h_vec ...
693 : !> \param f_vec ...
694 : !> \param s_vec ...
695 : !> \param w_vec ...
696 : !> \param nrow_local ...
697 : !> \param j ...
698 : !> \param local_history ...
699 : !> \param reorth_mat ...
700 : !> \param local_comp ...
701 : !> \param pcol_group ...
702 : ! **************************************************************************************************
703 542930 : SUBROUTINE Gram_Schmidt_ortho(h_vec, f_vec, s_vec, w_vec, nrow_local, &
704 542930 : j, local_history, reorth_mat, local_comp, pcol_group)
705 : REAL(kind=dp), DIMENSION(:) :: h_vec, f_vec, s_vec, w_vec
706 : INTEGER :: nrow_local, j
707 : REAL(kind=dp), DIMENSION(:, :) :: local_history, reorth_mat
708 : LOGICAL :: local_comp
709 : TYPE(mp_comm_type), INTENT(IN) :: pcol_group
710 :
711 : CHARACTER(LEN=*), PARAMETER :: routineN = 'Gram_Schmidt_ortho'
712 :
713 : INTEGER :: handle
714 :
715 542930 : CALL timeset(routineN, handle)
716 :
717 : ! Let's do the orthonormalization, first try the Gram Schmidt scheme
718 42760158 : h_vec = 0.0_dp; f_vec = 0.0_dp; s_vec = 0.0_dp
719 542930 : IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_dp, local_history, &
720 459287 : nrow_local, w_vec, 1, 0.0_dp, h_vec, 1)
721 9893516 : CALL pcol_group%sum(h_vec(1:j))
722 :
723 542930 : IF (local_comp) CALL dgemv('N', nrow_local, j, 1.0_dp, reorth_mat, &
724 459287 : nrow_local, h_vec, 1, 0.0_dp, f_vec, 1)
725 8515914 : f_vec(:) = w_vec(:) - f_vec(:)
726 :
727 542930 : CALL timestop(handle)
728 :
729 542930 : END SUBROUTINE Gram_Schmidt_ortho
730 :
731 : ! **************************************************************************************************
732 : !> \brief Compute the Daniel, Gragg, Kaufman and Steward correction
733 : !> \param h_vec ...
734 : !> \param f_vec ...
735 : !> \param s_vec ...
736 : !> \param nrow_local ...
737 : !> \param j ...
738 : !> \param local_history ...
739 : !> \param reorth_mat ...
740 : !> \param local_comp ...
741 : !> \param pcol_group ...
742 : ! **************************************************************************************************
743 542930 : SUBROUTINE DGKS_ortho(h_vec, f_vec, s_vec, nrow_local, j, &
744 1085860 : local_history, reorth_mat, local_comp, pcol_group)
745 : REAL(kind=dp), DIMENSION(:) :: h_vec, f_vec, s_vec
746 : INTEGER :: nrow_local, j
747 : REAL(kind=dp), DIMENSION(:, :) :: local_history, reorth_mat
748 : LOGICAL :: local_comp
749 : TYPE(mp_comm_type), INTENT(IN) :: pcol_group
750 :
751 : CHARACTER(LEN=*), PARAMETER :: routineN = 'DGKS_ortho'
752 :
753 : INTEGER :: handle
754 :
755 542930 : CALL timeset(routineN, handle)
756 :
757 542930 : IF (local_comp) CALL dgemv('T', nrow_local, j, 1.0_dp, local_history, &
758 459287 : nrow_local, f_vec, 1, 0.0_dp, s_vec, 1)
759 9893516 : CALL pcol_group%sum(s_vec(1:j))
760 542930 : IF (local_comp) CALL dgemv('N', nrow_local, j, -1.0_dp, reorth_mat, &
761 459287 : nrow_local, s_vec, 1, 1.0_dp, f_vec, 1)
762 5218223 : h_vec(1:j) = h_vec(1:j) + s_vec(1:j)
763 :
764 542930 : CALL timestop(handle)
765 :
766 542930 : END SUBROUTINE DGKS_ortho
767 :
768 : ! **************************************************************************************************
769 : !> \brief Compute the norm of a vector distributed along proc_col
770 : !> as local arrays. Always return the real part next to the complex rep.
771 : !> \param vec ...
772 : !> \param norm ...
773 : !> \param rnorm ...
774 : !> \param pcol_group ...
775 : ! **************************************************************************************************
776 854278 : SUBROUTINE compute_norms(vec, norm, rnorm, pcol_group)
777 : REAL(kind=dp), DIMENSION(:) :: vec
778 : REAL(kind=dp) :: norm
779 : REAL(dp) :: rnorm
780 : TYPE(mp_comm_type), INTENT(IN) :: pcol_group
781 :
782 : ! the norm is computed along the processor column
783 9312215 : norm = DOT_PRODUCT(vec, vec)
784 854278 : CALL pcol_group%sum(norm)
785 854278 : rnorm = SQRT(REAL(norm, dp))
786 854278 : norm = rnorm
787 :
788 854278 : END SUBROUTINE compute_norms
789 :
790 250 : END MODULE arnoldi_methods
|