Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines for calculating a complex matrix exponential.
10 : !> \author Florian Schiffmann (02.09)
11 : ! **************************************************************************************************
12 :
13 : MODULE matrix_exp
14 :
15 : USE cp_cfm_basic_linalg, ONLY: cp_cfm_scale_and_add,&
16 : cp_cfm_solve
17 : USE cp_cfm_types, ONLY: cp_cfm_create,&
18 : cp_cfm_release,&
19 : cp_cfm_set_all,&
20 : cp_cfm_to_cfm,&
21 : cp_cfm_type
22 : USE cp_fm_basic_linalg, ONLY: cp_complex_fm_gemm,&
23 : cp_fm_scale_and_add,&
24 : cp_fm_solve
25 : USE cp_fm_struct, ONLY: cp_fm_struct_double,&
26 : cp_fm_struct_release,&
27 : cp_fm_struct_type
28 : USE cp_fm_types, ONLY: cp_fm_create,&
29 : cp_fm_get_info,&
30 : cp_fm_release,&
31 : cp_fm_set_all,&
32 : cp_fm_to_fm,&
33 : cp_fm_type
34 : USE cp_log_handling, ONLY: cp_to_string
35 : USE kinds, ONLY: dp
36 : USE mathconstants, ONLY: fac,&
37 : z_one,&
38 : z_zero
39 : USE message_passing, ONLY: mp_comm_type,&
40 : mp_para_env_type
41 : USE parallel_gemm_api, ONLY: parallel_gemm
42 : #include "./base/base_uses.f90"
43 :
44 : IMPLICIT NONE
45 :
46 : PRIVATE
47 :
48 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'matrix_exp'
49 :
50 : PUBLIC :: taylor_only_imaginary, &
51 : taylor_full_complex, &
52 : exp_pade_full_complex, &
53 : exp_pade_only_imaginary, &
54 : get_nsquare_norder, &
55 : arnoldi, exp_pade_real
56 :
57 : CONTAINS
58 :
59 : ! **************************************************************************************************
60 : !> \brief specialized subroutine for purely imaginary matrix exponentials
61 : !> \param exp_H ...
62 : !> \param im_matrix ...
63 : !> \param nsquare ...
64 : !> \param ntaylor ...
65 : !> \author Florian Schiffmann (02.09)
66 : ! **************************************************************************************************
67 :
68 700 : SUBROUTINE taylor_only_imaginary(exp_H, im_matrix, nsquare, ntaylor)
69 : TYPE(cp_fm_type), DIMENSION(2) :: exp_H
70 : TYPE(cp_fm_type), INTENT(IN) :: im_matrix
71 : INTEGER, INTENT(in) :: nsquare, ntaylor
72 :
73 : CHARACTER(len=*), PARAMETER :: routineN = 'taylor_only_imaginary'
74 : REAL(KIND=dp), PARAMETER :: one = 1.0_dp, zero = 0.0_dp
75 :
76 : INTEGER :: handle, i, ndim, nloop
77 : REAL(KIND=dp) :: square_fac, Tfac, tmp
78 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
79 140 : POINTER :: local_data_im
80 : TYPE(cp_fm_type) :: T1, T2, Tres_im, Tres_re
81 :
82 140 : CALL timeset(routineN, handle)
83 :
84 140 : CALL cp_fm_get_info(im_matrix, local_data=local_data_im)
85 140 : ndim = im_matrix%matrix_struct%nrow_global
86 :
87 140 : square_fac = 1.0_dp/(2.0_dp**REAL(nsquare, dp))
88 : ! CALL cp_fm_scale(square_fac,im_matrix)
89 : CALL cp_fm_create(T1, &
90 : matrix_struct=im_matrix%matrix_struct, &
91 140 : name="T1")
92 :
93 : CALL cp_fm_create(T2, &
94 : matrix_struct=T1%matrix_struct, &
95 140 : name="T2")
96 : CALL cp_fm_create(Tres_im, &
97 : matrix_struct=T1%matrix_struct, &
98 140 : name="T3")
99 : CALL cp_fm_create(Tres_re, &
100 : matrix_struct=T1%matrix_struct, &
101 140 : name="Tres")
102 140 : tmp = 1.0_dp
103 :
104 140 : CALL cp_fm_set_all(Tres_re, zero, one)
105 140 : CALL cp_fm_set_all(Tres_im, zero, zero)
106 140 : CALL cp_fm_set_all(T1, zero, one)
107 :
108 140 : Tfac = one
109 140 : nloop = CEILING(REAL(ntaylor, dp)/2.0_dp)
110 :
111 700 : DO i = 1, nloop
112 560 : tmp = tmp*(REAL(i, dp)*2.0_dp - 1.0_dp)
113 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_matrix, T1, zero, &
114 : ! CALL parallel_gemm("N","N",ndim,ndim,ndim,one,im_matrix,T1,zero,&
115 560 : T2)
116 560 : Tfac = 1._dp/tmp
117 560 : IF (MOD(i, 2) == 0) Tfac = -Tfac
118 560 : CALL cp_fm_scale_and_add(one, Tres_im, Tfac, T2)
119 560 : tmp = tmp*REAL(i, dp)*2.0_dp
120 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_matrix, T2, zero, &
121 : ! CALL parallel_gemm("N","N",ndim,ndim,ndim,one,im_matrix,T2,zero,&
122 560 : T1)
123 560 : Tfac = 1._dp/tmp
124 560 : IF (MOD(i, 2) == 1) Tfac = -Tfac
125 700 : CALL cp_fm_scale_and_add(one, Tres_re, Tfac, T1)
126 :
127 : END DO
128 :
129 140 : IF (nsquare .GT. 0) THEN
130 0 : DO i = 1, nsquare
131 : CALL cp_complex_fm_gemm("N", "N", ndim, ndim, ndim, one, Tres_re, Tres_im, &
132 0 : Tres_re, Tres_im, zero, exp_H(1), exp_H(2))
133 :
134 0 : CALL cp_fm_to_fm(exp_H(1), Tres_re)
135 0 : CALL cp_fm_to_fm(exp_H(2), Tres_im)
136 : END DO
137 : ELSE
138 140 : CALL cp_fm_to_fm(Tres_re, exp_H(1))
139 140 : CALL cp_fm_to_fm(Tres_im, exp_H(2))
140 : END IF
141 :
142 140 : CALL cp_fm_release(T1)
143 140 : CALL cp_fm_release(T2)
144 140 : CALL cp_fm_release(Tres_re)
145 140 : CALL cp_fm_release(Tres_im)
146 :
147 140 : CALL timestop(handle)
148 :
149 140 : END SUBROUTINE taylor_only_imaginary
150 :
151 : ! **************************************************************************************************
152 : !> \brief subroutine for general complex matrix exponentials
153 : !> on input a separate cp_fm_type for real and complex part
154 : !> on output a size 2 cp_fm_type, first element is the real part of
155 : !> the exponential second the imaginary
156 : !> \param exp_H ...
157 : !> \param re_part ...
158 : !> \param im_part ...
159 : !> \param nsquare ...
160 : !> \param ntaylor ...
161 : !> \author Florian Schiffmann (02.09)
162 : ! **************************************************************************************************
163 :
164 2250 : SUBROUTINE taylor_full_complex(exp_H, re_part, im_part, nsquare, ntaylor)
165 : TYPE(cp_fm_type), DIMENSION(2) :: exp_H
166 : TYPE(cp_fm_type), INTENT(IN) :: re_part, im_part
167 : INTEGER, INTENT(in) :: nsquare, ntaylor
168 :
169 : CHARACTER(len=*), PARAMETER :: routineN = 'taylor_full_complex'
170 :
171 : COMPLEX(KIND=dp) :: Tfac
172 : INTEGER :: handle, i, ndim
173 : REAL(KIND=dp) :: square_fac, tmp
174 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
175 450 : POINTER :: local_data_im, local_data_re
176 : TYPE(cp_cfm_type) :: T1, T2, T3, Tres
177 :
178 450 : CALL timeset(routineN, handle)
179 450 : CALL cp_fm_get_info(re_part, local_data=local_data_re)
180 450 : CALL cp_fm_get_info(im_part, local_data=local_data_im)
181 450 : ndim = re_part%matrix_struct%nrow_global
182 :
183 : CALL cp_cfm_create(T1, &
184 : matrix_struct=re_part%matrix_struct, &
185 450 : name="T1")
186 :
187 450 : square_fac = 2.0_dp**REAL(nsquare, dp)
188 :
189 38538 : T1%local_data = CMPLX(local_data_re/square_fac, local_data_im/square_fac, KIND=dp)
190 :
191 : CALL cp_cfm_create(T2, &
192 : matrix_struct=T1%matrix_struct, &
193 450 : name="T2")
194 : CALL cp_cfm_create(T3, &
195 : matrix_struct=T1%matrix_struct, &
196 450 : name="T3")
197 : CALL cp_cfm_create(Tres, &
198 : matrix_struct=T1%matrix_struct, &
199 450 : name="Tres")
200 450 : tmp = 1.0_dp
201 450 : CALL cp_cfm_set_all(Tres, z_zero, z_one)
202 450 : CALL cp_cfm_set_all(T2, z_zero, z_one)
203 450 : Tfac = z_one
204 :
205 3568 : DO i = 1, ntaylor
206 3118 : tmp = tmp*REAL(i, dp)
207 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, T1, T2, z_zero, &
208 3118 : T3)
209 3118 : Tfac = CMPLX(1._dp/tmp, 0.0_dp, kind=dp)
210 3118 : CALL cp_cfm_scale_and_add(z_one, Tres, Tfac, T3)
211 3568 : CALL cp_cfm_to_cfm(T3, T2)
212 : END DO
213 :
214 450 : IF (nsquare .GT. 0) THEN
215 376 : DO i = 1, nsquare
216 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, Tres, Tres, z_zero, &
217 194 : T2)
218 376 : CALL cp_cfm_to_cfm(T2, Tres)
219 : END DO
220 : END IF
221 :
222 38538 : exp_H(1)%local_data = REAL(Tres%local_data, KIND=dp)
223 38538 : exp_H(2)%local_data = AIMAG(Tres%local_data)
224 :
225 450 : CALL cp_cfm_release(T1)
226 450 : CALL cp_cfm_release(T2)
227 450 : CALL cp_cfm_release(T3)
228 450 : CALL cp_cfm_release(Tres)
229 450 : CALL timestop(handle)
230 :
231 450 : END SUBROUTINE taylor_full_complex
232 :
233 : ! **************************************************************************************************
234 : !> \brief optimization function for pade/taylor order and number of squaring steps
235 : !> \param norm ...
236 : !> \param nsquare ...
237 : !> \param norder ...
238 : !> \param eps_exp ...
239 : !> \param method ...
240 : !> \param do_emd ...
241 : !> \author Florian Schiffmann (02.09)
242 : ! **************************************************************************************************
243 4506 : SUBROUTINE get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
244 :
245 : REAL(dp), INTENT(in) :: norm
246 : INTEGER, INTENT(out) :: nsquare, norder
247 : REAL(dp), INTENT(in) :: eps_exp
248 : INTEGER, INTENT(in) :: method
249 : LOGICAL, INTENT(in) :: do_emd
250 :
251 : INTEGER :: cost, i, iscale, orders(3), p, &
252 : prev_cost, q
253 : LOGICAL :: new_scale
254 : REAL(dp) :: D, eval, myval, N, scaleD, scaleN
255 :
256 4506 : orders(:) = (/12, 12, 12/)
257 4506 : IF (method == 2) THEN
258 18426 : DO iscale = 0, 12
259 17758 : new_scale = .FALSE.
260 17758 : eval = norm/(2.0_dp**REAL(iscale, dp))
261 116706 : DO q = 1, 12
262 296996 : DO p = MAX(1, q - 1), q
263 : IF (p > q) EXIT
264 : D = 1.0_dp
265 : N = 1.0_dp
266 1397486 : DO i = 1, q
267 1199438 : IF (i .LE. p) scaleN = fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i))
268 1199438 : scaleD = (-1.0)**i*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i))
269 1199438 : IF (i .LE. p) N = N + scaleN*eval**i
270 1397486 : D = D + scaleD*eval**i
271 : END DO
272 296996 : IF (ABS((EXP(norm) - (N/D)**(2.0_dp**iscale))/MAX(1.0_dp, EXP(norm))) .LE. eps_exp) THEN
273 10548 : IF (do_emd) THEN
274 70 : cost = iscale + q
275 70 : prev_cost = orders(1) + orders(2)
276 : ELSE
277 10478 : cost = iscale + CEILING(REAL(q, dp)/3.0_dp)
278 10478 : prev_cost = orders(1) + CEILING(REAL(orders(2), dp)/3.0_dp)
279 : END IF
280 10548 : IF (cost .LT. prev_cost) THEN
281 17344 : orders(:) = (/iscale, q, p/)
282 4336 : myval = (N/D)**(2.0_dp**iscale)
283 : END IF
284 : new_scale = .TRUE.
285 : EXIT
286 : END IF
287 : END DO
288 106158 : IF (new_scale) EXIT
289 : END DO
290 18426 : IF (iscale .GE. orders(1) + orders(2) .AND. new_scale) EXIT
291 : END DO
292 170 : ELSE IF (method == 1) THEN
293 170 : q = 0
294 170 : eval = norm
295 1324 : DO iscale = 0, 6
296 1166 : new_scale = .FALSE.
297 1166 : IF (iscale .GE. 1) eval = norm/(2.0_dp**REAL(iscale, dp))
298 6096 : DO p = 1, 20
299 27370 : D = 1.0_dp
300 : N = 1.0_dp
301 27370 : DO i = 1, p
302 21278 : scaleN = 1.0_dp/fac(i)
303 27370 : N = N + scaleN*(eval**REAL(i, dp))
304 : END DO
305 6096 : IF (ABS((EXP(norm) - N**(2.0_dp**REAL(iscale, dp)))/MAX(1.0_dp, EXP(norm))) .LE. eps_exp) THEN
306 1162 : IF (do_emd) THEN
307 304 : cost = iscale + p
308 304 : prev_cost = orders(1) + orders(2)
309 : ELSE
310 858 : cost = iscale + CEILING(REAL(p, dp)/3.0_dp)
311 858 : prev_cost = orders(1) + CEILING(REAL(orders(2), dp)/3.0_dp)
312 : END IF
313 1162 : IF (cost .LT. prev_cost) THEN
314 808 : orders(:) = (/iscale, p, 0/)
315 202 : myval = (N)**(2.0_dp**iscale)
316 : END IF
317 : new_scale = .TRUE.
318 : EXIT
319 : END IF
320 : END DO
321 1324 : IF (iscale .GE. orders(1) + orders(2) .AND. new_scale) EXIT
322 : END DO
323 : END IF
324 :
325 4506 : nsquare = orders(1)
326 4506 : norder = orders(2)
327 :
328 4506 : END SUBROUTINE get_nsquare_norder
329 :
330 : ! **************************************************************************************************
331 : !> \brief exponential of a complex matrix,
332 : !> calculated using pade approximation together with scaling and squaring
333 : !> \param exp_H ...
334 : !> \param re_part ...
335 : !> \param im_part ...
336 : !> \param nsquare ...
337 : !> \param npade ...
338 : !> \author Florian Schiffmann (02.09)
339 : ! **************************************************************************************************
340 :
341 2848 : SUBROUTINE exp_pade_full_complex(exp_H, re_part, im_part, nsquare, npade)
342 : TYPE(cp_fm_type), DIMENSION(2) :: exp_H
343 : TYPE(cp_fm_type), INTENT(IN) :: re_part, im_part
344 : INTEGER, INTENT(in) :: nsquare, npade
345 :
346 : CHARACTER(len=*), PARAMETER :: routineN = 'exp_pade_full_complex'
347 :
348 : COMPLEX(KIND=dp) :: scaleD, scaleN
349 : INTEGER :: handle, i, ldim, ndim, p, q
350 : REAL(KIND=dp) :: square_fac, tmp
351 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
352 356 : POINTER :: local_data_im, local_data_re
353 : TYPE(cp_cfm_type) :: Dpq, fin_p, T1
354 1068 : TYPE(cp_cfm_type), DIMENSION(2) :: mult_p
355 : TYPE(cp_cfm_type), TARGET :: Npq, T2, Tres
356 :
357 356 : p = npade
358 356 : q = npade
359 :
360 356 : CALL timeset(routineN, handle)
361 : CALL cp_fm_get_info(re_part, local_data=local_data_re, ncol_local=ldim, &
362 356 : nrow_global=ndim)
363 356 : CALL cp_fm_get_info(im_part, local_data=local_data_im)
364 :
365 : CALL cp_cfm_create(Dpq, &
366 : matrix_struct=re_part%matrix_struct, &
367 356 : name="Dpq")
368 :
369 356 : square_fac = 2.0_dp**REAL(nsquare, dp)
370 :
371 : CALL cp_cfm_create(T1, &
372 : matrix_struct=Dpq%matrix_struct, &
373 356 : name="T1")
374 :
375 : CALL cp_cfm_create(T2, &
376 : matrix_struct=T1%matrix_struct, &
377 356 : name="T2")
378 : CALL cp_cfm_create(Npq, &
379 : matrix_struct=T1%matrix_struct, &
380 356 : name="Npq")
381 : CALL cp_cfm_create(Tres, &
382 : matrix_struct=T1%matrix_struct, &
383 356 : name="Tres")
384 :
385 4218 : DO i = 1, ldim
386 33761 : T2%local_data(:, i) = CMPLX(local_data_re(:, i)/square_fac, local_data_im(:, i)/square_fac, KIND=dp)
387 : END DO
388 356 : CALL cp_cfm_to_cfm(T2, T1)
389 356 : mult_p(1) = T2
390 356 : mult_p(2) = Tres
391 356 : tmp = 1.0_dp
392 356 : CALL cp_cfm_set_all(Npq, z_zero, z_one)
393 356 : CALL cp_cfm_set_all(Dpq, z_zero, z_one)
394 :
395 356 : CALL cp_cfm_scale_and_add(z_one, Npq, z_one*0.5_dp, T2)
396 356 : CALL cp_cfm_scale_and_add(z_one, Dpq, -z_one*0.5_dp, T2)
397 :
398 356 : IF (npade .GT. 2) THEN
399 1392 : DO i = 2, npade
400 1044 : IF (i .LE. p) scaleN = CMPLX(fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i)), 0.0_dp, kind=dp)
401 1044 : scaleD = CMPLX((-1.0_dp)**i*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i)), 0.0_dp, kind=dp)
402 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, T1, mult_p(MOD(i, 2) + 1), z_zero, &
403 1044 : mult_p(MOD(i + 1, 2) + 1))
404 1044 : IF (i .LE. p) CALL cp_cfm_scale_and_add(z_one, Npq, scaleN, mult_p(MOD(i + 1, 2) + 1))
405 1392 : IF (i .LE. q) CALL cp_cfm_scale_and_add(z_one, Dpq, scaleD, mult_p(MOD(i + 1, 2) + 1))
406 : END DO
407 : END IF
408 :
409 356 : CALL cp_cfm_solve(Dpq, Npq)
410 :
411 356 : mult_p(2) = Npq
412 356 : mult_p(1) = Tres
413 356 : IF (nsquare .GT. 0) THEN
414 0 : DO i = 1, nsquare
415 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, mult_p(MOD(i, 2) + 1), mult_p(MOD(i, 2) + 1), z_zero, &
416 0 : mult_p(MOD(i + 1, 2) + 1))
417 0 : fin_p = mult_p(MOD(i + 1, 2) + 1)
418 : END DO
419 : ELSE
420 356 : fin_p = Npq
421 : END IF
422 4218 : DO i = 1, ldim
423 33405 : exp_H(1)%local_data(:, i) = REAL(fin_p%local_data(:, i), KIND=dp)
424 33761 : exp_H(2)%local_data(:, i) = AIMAG(fin_p%local_data(:, i))
425 : END DO
426 :
427 356 : CALL cp_cfm_release(Npq)
428 356 : CALL cp_cfm_release(Dpq)
429 356 : CALL cp_cfm_release(T1)
430 356 : CALL cp_cfm_release(T2)
431 356 : CALL cp_cfm_release(Tres)
432 356 : CALL timestop(handle)
433 :
434 712 : END SUBROUTINE exp_pade_full_complex
435 :
436 : ! **************************************************************************************************
437 : !> \brief exponential of a complex matrix,
438 : !> calculated using pade approximation together with scaling and squaring
439 : !> \param exp_H ...
440 : !> \param im_part ...
441 : !> \param nsquare ...
442 : !> \param npade ...
443 : !> \author Florian Schiffmann (02.09)
444 : ! **************************************************************************************************
445 :
446 2656 : SUBROUTINE exp_pade_only_imaginary(exp_H, im_part, nsquare, npade)
447 : TYPE(cp_fm_type), DIMENSION(2) :: exp_H
448 : TYPE(cp_fm_type), INTENT(IN) :: im_part
449 : INTEGER, INTENT(in) :: nsquare, npade
450 :
451 : CHARACTER(len=*), PARAMETER :: routineN = 'exp_pade_only_imaginary'
452 : REAL(KIND=dp), PARAMETER :: rone = 1.0_dp, rzero = 0.0_dp
453 :
454 : COMPLEX(KIND=dp) :: scaleD, scaleN
455 : INTEGER :: handle, i, j, k, ldim, ndim, p, q
456 : REAL(KIND=dp) :: my_fac, square_fac
457 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
458 332 : POINTER :: local_data_im
459 : TYPE(cp_cfm_type) :: Dpq, fin_p
460 996 : TYPE(cp_cfm_type), DIMENSION(2) :: cmult_p
461 : TYPE(cp_cfm_type), TARGET :: Npq, T1
462 : TYPE(cp_fm_type) :: T2, Tres
463 :
464 332 : CALL timeset(routineN, handle)
465 332 : p = npade
466 332 : q = npade !p==q seems to be necessary for the rest of the code
467 :
468 332 : CALL cp_fm_get_info(im_part, local_data=local_data_im, ncol_local=ldim, nrow_global=ndim)
469 332 : square_fac = 1.0_dp/(2.0_dp**REAL(nsquare, dp))
470 :
471 : CALL cp_cfm_create(Dpq, &
472 : matrix_struct=im_part%matrix_struct, &
473 332 : name="Dpq")
474 :
475 : CALL cp_cfm_create(Npq, &
476 : matrix_struct=Dpq%matrix_struct, &
477 332 : name="Npq")
478 :
479 : CALL cp_cfm_create(T1, &
480 : matrix_struct=Dpq%matrix_struct, &
481 332 : name="T1")
482 :
483 : CALL cp_fm_create(T2, &
484 : matrix_struct=T1%matrix_struct, &
485 332 : name="T2")
486 :
487 : CALL cp_fm_create(Tres, &
488 : matrix_struct=T1%matrix_struct, &
489 332 : name="Tres")
490 :
491 : ! DO i=1,ldim
492 : ! local_data_im(:,i)=local_data_im(:,i)/square_fac
493 : ! END DO
494 :
495 332 : CALL cp_fm_to_fm(im_part, T2)
496 :
497 332 : CALL cp_cfm_set_all(Npq, z_zero, z_one)
498 332 : CALL cp_cfm_set_all(Dpq, z_zero, z_one)
499 :
500 5006 : DO i = 1, ldim
501 47633 : Npq%local_data(:, i) = Npq%local_data(:, i) + CMPLX(rzero, 0.5_dp*square_fac*local_data_im(:, i), dp)
502 47965 : Dpq%local_data(:, i) = Dpq%local_data(:, i) - CMPLX(rzero, 0.5_dp*square_fac*local_data_im(:, i), dp)
503 : END DO
504 :
505 332 : IF (npade .GT. 2) THEN
506 960 : DO j = 1, FLOOR(npade/2.0_dp)
507 640 : i = 2*j
508 640 : my_fac = (-rone)**j
509 640 : IF (i .LE. p) scaleN = CMPLX(my_fac*fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i)), 0.0_dp, dp)
510 640 : scaleD = CMPLX(my_fac*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i)), 0.0_dp, dp)
511 640 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_part, T2, rzero, Tres)
512 :
513 9748 : DO k = 1, ldim
514 93826 : Npq%local_data(:, k) = Npq%local_data(:, k) + scaleN*Tres%local_data(:, k)
515 94466 : Dpq%local_data(:, k) = Dpq%local_data(:, k) + scaleD*Tres%local_data(:, k)
516 : END DO
517 :
518 960 : IF (2*j + 1 .LE. q) THEN
519 320 : i = 2*j + 1
520 320 : IF (i .LE. p) scaleN = CMPLX(my_fac*fac(p + q - i)*fac(p)/(fac(p + q)*fac(i)*fac(p - i)), rzero, dp)
521 320 : scaleD = CMPLX(-my_fac*fac(p + q - i)*fac(q)/(fac(p + q)*fac(i)*fac(q - i)), rzero, dp)
522 320 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, square_fac, im_part, Tres, rzero, T2)
523 :
524 4874 : DO k = 1, ldim
525 46913 : Npq%local_data(:, k) = Npq%local_data(:, k) + scaleN*CMPLX(rzero, T2%local_data(:, k), dp)
526 47233 : Dpq%local_data(:, k) = Dpq%local_data(:, k) + scaleD*CMPLX(rzero, T2%local_data(:, k), dp)
527 : END DO
528 : END IF
529 : END DO
530 : END IF
531 :
532 332 : CALL cp_cfm_solve(Dpq, Npq)
533 :
534 332 : cmult_p(2) = Npq
535 332 : cmult_p(1) = T1
536 332 : IF (nsquare .GT. 0) THEN
537 0 : DO i = 1, nsquare
538 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, z_one, cmult_p(MOD(i, 2) + 1), cmult_p(MOD(i, 2) + 1), z_zero, &
539 0 : cmult_p(MOD(i + 1, 2) + 1))
540 0 : fin_p = cmult_p(MOD(i + 1, 2) + 1)
541 : END DO
542 : ELSE
543 332 : fin_p = Npq
544 : END IF
545 :
546 5006 : DO k = 1, ldim
547 47633 : exp_H(1)%local_data(:, k) = REAL(fin_p%local_data(:, k), KIND=dp)
548 47965 : exp_H(2)%local_data(:, k) = AIMAG(fin_p%local_data(:, k))
549 : END DO
550 :
551 332 : CALL cp_cfm_release(Npq)
552 332 : CALL cp_cfm_release(Dpq)
553 332 : CALL cp_cfm_release(T1)
554 332 : CALL cp_fm_release(T2)
555 332 : CALL cp_fm_release(Tres)
556 332 : CALL timestop(handle)
557 332 : END SUBROUTINE exp_pade_only_imaginary
558 :
559 : ! **************************************************************************************************
560 : !> \brief exponential of a real matrix,
561 : !> calculated using pade approximation together with scaling and squaring
562 : !> \param exp_H ...
563 : !> \param matrix ...
564 : !> \param nsquare ...
565 : !> \param npade ...
566 : !> \author Florian Schiffmann (02.09)
567 : ! **************************************************************************************************
568 :
569 34368 : SUBROUTINE exp_pade_real(exp_H, matrix, nsquare, npade)
570 : TYPE(cp_fm_type), INTENT(IN) :: exp_H, matrix
571 : INTEGER, INTENT(in) :: nsquare, npade
572 :
573 : CHARACTER(len=*), PARAMETER :: routineN = 'exp_pade_real'
574 : REAL(KIND=dp), PARAMETER :: one = 1.0_dp, zero = 0.0_dp
575 :
576 : INTEGER :: handle, i, j, k, ldim, ndim, p, q
577 : REAL(KIND=dp) :: my_fac, scaleD, scaleN, square_fac
578 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
579 4296 : POINTER :: local_data
580 : TYPE(cp_fm_type) :: Dpq, fin_p
581 12888 : TYPE(cp_fm_type), DIMENSION(2) :: mult_p
582 : TYPE(cp_fm_type), TARGET :: Npq, T1, T2, Tres
583 :
584 4296 : CALL timeset(routineN, handle)
585 4296 : p = npade
586 4296 : q = npade !p==q seems to be necessary for the rest of the code
587 :
588 4296 : CALL cp_fm_get_info(matrix, local_data=local_data, ncol_local=ldim, nrow_global=ndim)
589 4296 : square_fac = 2.0_dp**REAL(nsquare, dp)
590 :
591 : CALL cp_fm_create(Dpq, &
592 : matrix_struct=matrix%matrix_struct, &
593 4296 : name="Dpq")
594 :
595 : CALL cp_fm_create(Npq, &
596 : matrix_struct=Dpq%matrix_struct, &
597 4296 : name="Npq")
598 :
599 : CALL cp_fm_create(T1, &
600 : matrix_struct=Dpq%matrix_struct, &
601 4296 : name="T1")
602 :
603 : CALL cp_fm_create(T2, &
604 : matrix_struct=T1%matrix_struct, &
605 4296 : name="T2")
606 :
607 : CALL cp_fm_create(Tres, &
608 : matrix_struct=T1%matrix_struct, &
609 4296 : name="Tres")
610 :
611 62878 : DO i = 1, ldim
612 999496 : T2%local_data(:, i) = local_data(:, i)/square_fac
613 : END DO
614 :
615 4296 : CALL cp_fm_to_fm(T2, T1)
616 4296 : CALL cp_fm_set_all(Npq, zero, one)
617 4296 : CALL cp_fm_set_all(Dpq, zero, one)
618 :
619 62878 : DO i = 1, ldim
620 995200 : Npq%local_data(:, i) = Npq%local_data(:, i) + 0.5_dp*local_data(:, i)
621 531187 : Dpq%local_data(:, i) = Dpq%local_data(:, i) - 0.5_dp*local_data(:, i)
622 : END DO
623 :
624 4296 : mult_p(1) = T2
625 4296 : mult_p(2) = Tres
626 :
627 4296 : IF (npade .GE. 2) THEN
628 5686 : DO j = 2, npade
629 4106 : my_fac = (-1.0_dp)**j
630 4106 : scaleN = fac(p + q - j)*fac(p)/(fac(p + q)*fac(j)*fac(p - j))
631 4106 : scaleD = my_fac*fac(p + q - j)*fac(q)/(fac(p + q)*fac(j)*fac(q - j))
632 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, one, mult_p(MOD(j, 2) + 1), T1, &
633 4106 : zero, mult_p(MOD(j + 1, 2) + 1))
634 :
635 57542 : DO k = 1, ldim
636 500564 : Npq%local_data(:, k) = Npq%local_data(:, k) + scaleN*mult_p(MOD(j + 1, 2) + 1)%local_data(:, k)
637 504670 : Dpq%local_data(:, k) = Dpq%local_data(:, k) + scaleD*mult_p(MOD(j + 1, 2) + 1)%local_data(:, k)
638 : END DO
639 : END DO
640 : END IF
641 :
642 4296 : CALL cp_fm_solve(Dpq, Npq)
643 :
644 4296 : mult_p(2) = Npq
645 4296 : mult_p(1) = T1
646 4296 : IF (nsquare .GT. 0) THEN
647 0 : DO i = 1, nsquare
648 : CALL parallel_gemm("N", "N", ndim, ndim, ndim, one, mult_p(MOD(i, 2) + 1), mult_p(MOD(i, 2) + 1), zero, &
649 0 : mult_p(MOD(i + 1, 2) + 1))
650 : END DO
651 0 : fin_p = mult_p(MOD(nsquare + 1, 2) + 1)
652 : ELSE
653 4296 : fin_p = Npq
654 : END IF
655 :
656 62878 : DO k = 1, ldim
657 531187 : exp_H%local_data(:, k) = fin_p%local_data(:, k)
658 : END DO
659 :
660 4296 : CALL cp_fm_release(Npq)
661 4296 : CALL cp_fm_release(Dpq)
662 4296 : CALL cp_fm_release(T1)
663 4296 : CALL cp_fm_release(T2)
664 4296 : CALL cp_fm_release(Tres)
665 4296 : CALL timestop(handle)
666 :
667 8592 : END SUBROUTINE exp_pade_real
668 :
669 : ! ***************************************************************************************************
670 : !> \brief exponential of a complex matrix,
671 : !> calculated using arnoldi subspace method (directly applies to the MOs)
672 : !> \param mos_old ...
673 : !> \param mos_new ...
674 : !> \param eps_exp ...
675 : !> \param Hre ...
676 : !> \param Him ...
677 : !> \param mos_next ...
678 : !> \param narn_old ...
679 : !> \author Florian Schiffmann (02.09)
680 : ! **************************************************************************************************
681 :
682 744 : SUBROUTINE arnoldi(mos_old, mos_new, eps_exp, Hre, Him, mos_next, narn_old)
683 :
684 : TYPE(cp_fm_type), DIMENSION(2) :: mos_old, mos_new
685 : REAL(KIND=dp), INTENT(in) :: eps_exp
686 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: Hre
687 : TYPE(cp_fm_type), INTENT(IN) :: Him
688 : TYPE(cp_fm_type), DIMENSION(2), OPTIONAL :: mos_next
689 : INTEGER, INTENT(inout) :: narn_old
690 :
691 : CHARACTER(len=*), PARAMETER :: routineN = 'arnoldi'
692 : REAL(KIND=dp), PARAMETER :: rone = 1.0_dp, rzero = 0.0_dp
693 :
694 : INTEGER :: handle, i, icol_local, idim, info, j, l, &
695 : mydim, nao, narnoldi, ncol_local, &
696 : newdim, nmo, npade, pade_step
697 744 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
698 744 : INTEGER, DIMENSION(:), POINTER :: col_indices, col_procs
699 : LOGICAL :: convergence, double_col, double_row
700 744 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: last_norm, norm1, results
701 744 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: D, mat1, mat2, mat3, N
702 744 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: H_approx, H_approx_save
703 : REAL(KIND=dp) :: conv_norm, prefac, scaleD, scaleN
704 : TYPE(cp_fm_struct_type), POINTER :: mo_struct, newstruct
705 744 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: V_mats
706 : TYPE(mp_comm_type) :: col_group
707 : TYPE(mp_para_env_type), POINTER :: para_env
708 :
709 744 : CALL timeset(routineN, handle)
710 744 : para_env => mos_new(1)%matrix_struct%para_env
711 :
712 : CALL cp_fm_get_info(mos_old(1), ncol_local=ncol_local, col_indices=col_indices, &
713 744 : nrow_global=nao, ncol_global=nmo, matrix_struct=mo_struct)
714 744 : narnoldi = MIN(18, nao)
715 :
716 2232 : ALLOCATE (results(ncol_local))
717 1488 : ALLOCATE (norm1(ncol_local))
718 13632 : ALLOCATE (V_mats(narnoldi + 1))
719 1488 : ALLOCATE (last_norm(ncol_local))
720 3720 : ALLOCATE (H_approx(narnoldi, narnoldi, ncol_local))
721 2976 : ALLOCATE (H_approx_save(narnoldi, narnoldi, ncol_local))
722 744 : col_procs => mo_struct%context%blacs2mpi(:, mo_struct%context%mepos(2))
723 2232 : CALL col_group%from_reordering(para_env, col_procs)
724 :
725 744 : double_col = .TRUE.
726 744 : double_row = .FALSE.
727 744 : CALL cp_fm_struct_double(newstruct, mo_struct, mo_struct%context, double_col, double_row)
728 735882 : H_approx_save = rzero
729 :
730 12144 : DO i = 1, narnoldi + 1
731 : CALL cp_fm_create(V_mats(i), matrix_struct=newstruct, &
732 12144 : name="V_mat"//cp_to_string(i))
733 : END DO
734 744 : CALL cp_fm_get_info(V_mats(1), ncol_global=newdim)
735 :
736 4006 : norm1 = 0.0_dp
737 744 : !$OMP PARALLEL DO PRIVATE(icol_local) DEFAULT(NONE) SHARED(V_mats,norm1,mos_old,ncol_local)
738 : DO icol_local = 1, ncol_local
739 : V_mats(1)%local_data(:, icol_local) = mos_old(1)%local_data(:, icol_local)
740 : V_mats(1)%local_data(:, icol_local + ncol_local) = mos_old(2)%local_data(:, icol_local)
741 : norm1(icol_local) = SUM(V_mats(1)%local_data(:, icol_local)**2) &
742 : + SUM(V_mats(1)%local_data(:, icol_local + ncol_local)**2)
743 : END DO
744 :
745 744 : CALL col_group%sum(norm1)
746 : !!! normalize the mo vectors
747 4006 : norm1(:) = SQRT(norm1(:))
748 :
749 744 : !$OMP PARALLEL DO PRIVATE(icol_local) DEFAULT(NONE) SHARED(V_mats,norm1,ncol_local)
750 : DO icol_local = 1, ncol_local
751 : V_mats(1)%local_data(:, icol_local) = V_mats(1)%local_data(:, icol_local)/norm1(icol_local)
752 : V_mats(1)%local_data(:, icol_local + ncol_local) = &
753 : V_mats(1)%local_data(:, icol_local + ncol_local)/norm1(icol_local)
754 : END DO
755 :
756 : ! arnoldi subspace procedure to get H_approx
757 4994 : DO i = 2, narnoldi + 1
758 : !Be careful, imaginary matrix multiplied with complex. Unfortunately requires a swap of arrays afterwards
759 4994 : CALL parallel_gemm("N", "N", nao, newdim, nao, 1.0_dp, Him, V_mats(i - 1), 0.0_dp, V_mats(i))
760 :
761 4994 : !$OMP PARALLEL DO PRIVATE(icol_local) DEFAULT(NONE) SHARED(mos_new,V_mats,ncol_local,i)
762 : DO icol_local = 1, ncol_local
763 : mos_new(1)%local_data(:, icol_local) = V_mats(i)%local_data(:, icol_local)
764 : V_mats(i)%local_data(:, icol_local) = -V_mats(i)%local_data(:, icol_local + ncol_local)
765 : V_mats(i)%local_data(:, icol_local + ncol_local) = mos_new(1)%local_data(:, icol_local)
766 : END DO
767 :
768 4994 : IF (PRESENT(Hre)) THEN
769 3118 : CALL parallel_gemm("N", "N", nao, newdim, nao, 1.0_dp, Hre, V_mats(i - 1), 1.0_dp, V_mats(i))
770 : END IF
771 :
772 24586 : DO l = 1, i - 1
773 19592 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(results,V_mats,ncol_local,l,i)
774 : DO icol_local = 1, ncol_local
775 : results(icol_local) = SUM(V_mats(l)%local_data(:, icol_local)*V_mats(i)%local_data(:, icol_local)) + &
776 : SUM(V_mats(l)%local_data(:, icol_local + ncol_local)* &
777 : V_mats(i)%local_data(:, icol_local + ncol_local))
778 : END DO
779 :
780 19592 : CALL col_group%sum(results)
781 :
782 24586 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(H_approx_save,V_mats,ncol_local,l,i,results)
783 : DO icol_local = 1, ncol_local
784 : H_approx_save(l, i - 1, icol_local) = results(icol_local)
785 : V_mats(i)%local_data(:, icol_local) = V_mats(i)%local_data(:, icol_local) - &
786 : results(icol_local)*V_mats(l)%local_data(:, icol_local)
787 : V_mats(i)%local_data(:, icol_local + ncol_local) = &
788 : V_mats(i)%local_data(:, icol_local + ncol_local) - &
789 : results(icol_local)*V_mats(l)%local_data(:, icol_local + ncol_local)
790 : END DO
791 : END DO
792 :
793 4994 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(ncol_local,V_mats,results,i)
794 : DO icol_local = 1, ncol_local
795 : results(icol_local) = SUM(V_mats(i)%local_data(:, icol_local)**2) + &
796 : SUM(V_mats(i)%local_data(:, icol_local + ncol_local)**2)
797 : END DO
798 :
799 4994 : CALL col_group%sum(results)
800 :
801 4994 : IF (i .LE. narnoldi) THEN
802 :
803 4994 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(H_approx_save,last_norm,V_mats,ncol_local,i,results)
804 : DO icol_local = 1, ncol_local
805 : H_approx_save(i, i - 1, icol_local) = SQRT(results(icol_local))
806 : last_norm(icol_local) = SQRT(results(icol_local))
807 : V_mats(i)%local_data(:, icol_local) = V_mats(i)%local_data(:, icol_local)/SQRT(results(icol_local))
808 : V_mats(i)%local_data(:, icol_local + ncol_local) = &
809 : V_mats(i)%local_data(:, icol_local + ncol_local)/SQRT(results(icol_local))
810 : END DO
811 : ELSE
812 0 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(ncol_local,last_norm,results)
813 : DO icol_local = 1, ncol_local
814 : last_norm(icol_local) = SQRT(results(icol_local))
815 : END DO
816 : END IF
817 :
818 5178290 : H_approx(:, :, :) = H_approx_save
819 :
820 : ! PADE approximation for exp(H_approx), everything is done locally
821 :
822 4994 : convergence = .FALSE.
823 4994 : IF (i .GE. narn_old) THEN
824 1630 : npade = 9
825 1630 : mydim = MIN(i, narnoldi)
826 4890 : ALLOCATE (ipivot(mydim))
827 6520 : ALLOCATE (mat1(mydim, mydim))
828 4890 : ALLOCATE (mat2(mydim, mydim))
829 4890 : ALLOCATE (mat3(mydim, mydim))
830 4890 : ALLOCATE (N(mydim, mydim))
831 4890 : ALLOCATE (D(mydim, mydim))
832 8718 : DO icol_local = 1, ncol_local
833 56420 : DO idim = 1, mydim
834 412184 : DO j = 1, mydim
835 355764 : mat1(idim, j) = H_approx(idim, j, icol_local)/16.0_dp
836 405096 : mat3(idim, j) = mat1(idim, j)
837 : END DO
838 : END DO
839 412184 : N = 0.0_dp
840 412184 : D = 0.0_dp
841 56420 : DO idim = 1, mydim
842 49332 : N(idim, idim) = rone
843 56420 : D(idim, idim) = rone
844 : END DO
845 412184 : N(:, :) = N + 0.5_dp*mat1
846 412184 : D(:, :) = D - 0.5_dp*mat1
847 : pade_step = 1
848 35440 : DO idim = 1, 4
849 28352 : pade_step = pade_step + 1
850 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat1(1, 1), &
851 28352 : mydim, mat3(1, 1), mydim, rzero, mat2(1, 1), mydim)
852 : scaleN = REAL(fac(2*npade - pade_step)*fac(npade)/ &
853 28352 : (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
854 : scaleD = REAL((-1.0_dp)**pade_step*fac(2*npade - pade_step)*fac(npade)/ &
855 28352 : (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
856 1648736 : N(:, :) = N + scaleN*mat2
857 1648736 : D(:, :) = D + scaleD*mat2
858 28352 : pade_step = pade_step + 1
859 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat2(1, 1), &
860 28352 : mydim, mat1(1, 1), mydim, rzero, mat3(1, 1), mydim)
861 : scaleN = REAL(fac(2*npade - pade_step)*fac(npade)/ &
862 28352 : (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
863 : scaleD = REAL((-1.0_dp)**pade_step*fac(2*npade - pade_step)*fac(npade)/ &
864 28352 : (fac(2*npade)*fac(pade_step)*fac(npade - pade_step)), dp)
865 1648736 : N(:, :) = N + scaleN*mat3
866 1655824 : D(:, :) = D + scaleD*mat3
867 : END DO
868 :
869 7088 : CALL dgetrf(mydim, mydim, D(1, 1), mydim, ipivot, info)
870 7088 : CALL dgetrs("N", mydim, mydim, D(1, 1), mydim, ipivot, N, mydim, info)
871 7088 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, N(1, 1), mydim, N(1, 1), mydim, rzero, mat1(1, 1), mydim)
872 7088 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat1(1, 1), mydim, mat1(1, 1), mydim, rzero, N(1, 1), mydim)
873 7088 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, N(1, 1), mydim, N(1, 1), mydim, rzero, mat1(1, 1), mydim)
874 7088 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, mat1(1, 1), mydim, mat1(1, 1), mydim, rzero, N(1, 1), mydim)
875 58050 : DO idim = 1, mydim
876 412184 : DO j = 1, mydim
877 405096 : H_approx(idim, j, icol_local) = N(idim, j)
878 : END DO
879 : END DO
880 : END DO
881 : ! H_approx is exp(H_approx) right now, calculate new MOs and check for convergence
882 1630 : conv_norm = 0.0_dp
883 8718 : results = 0.0_dp
884 8718 : DO icol_local = 1, ncol_local
885 7088 : results(icol_local) = last_norm(icol_local)*H_approx(i - 1, 1, icol_local)
886 8718 : conv_norm = MAX(conv_norm, ABS(results(icol_local)))
887 : END DO
888 :
889 1630 : CALL para_env%max(conv_norm)
890 :
891 1630 : IF (conv_norm .LT. eps_exp .OR. i .GT. narnoldi) THEN
892 :
893 30768 : mos_new(1)%local_data = rzero
894 30768 : mos_new(2)%local_data = rzero
895 4006 : DO icol_local = 1, ncol_local
896 29044 : DO idim = 1, mydim
897 25038 : prefac = H_approx(idim, 1, icol_local)*norm1(icol_local)
898 : mos_new(1)%local_data(:, icol_local) = mos_new(1)%local_data(:, icol_local) + &
899 237900 : V_mats(idim)%local_data(:, icol_local)*prefac
900 : mos_new(2)%local_data(:, icol_local) = mos_new(2)%local_data(:, icol_local) + &
901 241162 : V_mats(idim)%local_data(:, icol_local + ncol_local)*prefac
902 : END DO
903 : END DO
904 :
905 744 : IF (PRESENT(mos_next)) THEN
906 4006 : DO icol_local = 1, ncol_local
907 28300 : DO idim = 1, mydim
908 223010 : DO j = 1, mydim
909 219748 : N(idim, j) = H_approx(idim, j, icol_local)
910 : END DO
911 : END DO
912 3262 : CALL dgemm("N", 'N', mydim, mydim, mydim, rone, N(1, 1), mydim, N(1, 1), mydim, rzero, mat1(1, 1), mydim)
913 29044 : DO idim = 1, mydim
914 223010 : DO j = 1, mydim
915 219748 : H_approx(idim, j, icol_local) = mat1(idim, j)
916 : END DO
917 : END DO
918 : END DO
919 30768 : mos_next(1)%local_data = rzero
920 30768 : mos_next(2)%local_data = rzero
921 4006 : DO icol_local = 1, ncol_local
922 29044 : DO idim = 1, mydim
923 25038 : prefac = H_approx(idim, 1, icol_local)*norm1(icol_local)
924 : mos_next(1)%local_data(:, icol_local) = &
925 : mos_next(1)%local_data(:, icol_local) + &
926 237900 : V_mats(idim)%local_data(:, icol_local)*prefac
927 : mos_next(2)%local_data(:, icol_local) = &
928 : mos_next(2)%local_data(:, icol_local) + &
929 241162 : V_mats(idim)%local_data(:, icol_local + ncol_local)*prefac
930 : END DO
931 : END DO
932 : END IF
933 744 : IF (conv_norm .LT. eps_exp) THEN
934 744 : convergence = .TRUE.
935 744 : narn_old = i - 1
936 : END IF
937 : END IF
938 :
939 1630 : DEALLOCATE (ipivot)
940 1630 : DEALLOCATE (mat1)
941 1630 : DEALLOCATE (mat2)
942 1630 : DEALLOCATE (mat3)
943 1630 : DEALLOCATE (N)
944 1630 : DEALLOCATE (D)
945 : END IF
946 1630 : IF (convergence) EXIT
947 :
948 : END DO
949 744 : CPWARN_IF(.NOT. convergence, "ARNOLDI method did not converge")
950 : !deallocate all work matrices
951 :
952 744 : CALL cp_fm_release(V_mats)
953 744 : CALL cp_fm_struct_release(newstruct)
954 744 : CALL col_group%free()
955 :
956 744 : DEALLOCATE (H_approx)
957 744 : DEALLOCATE (H_approx_save)
958 744 : DEALLOCATE (results)
959 744 : DEALLOCATE (norm1)
960 744 : DEALLOCATE (last_norm)
961 744 : CALL timestop(handle)
962 2232 : END SUBROUTINE arnoldi
963 :
964 : END MODULE matrix_exp
|