Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
10 : !> \par History
11 : !> 11.2023 created [Maximilian Graml]
12 : ! **************************************************************************************************
13 : MODULE bse_util
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE cell_types, ONLY: cell_type
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_control_types, ONLY: dft_control_type
18 : USE cp_dbcsr_api, ONLY: dbcsr_create,&
19 : dbcsr_init_p,&
20 : dbcsr_p_type,&
21 : dbcsr_set,&
22 : dbcsr_type_symmetric
23 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_alloc_block_from_nbl
24 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply,&
25 : dbcsr_allocate_matrix_set,&
26 : dbcsr_deallocate_matrix_set
27 : USE cp_fm_basic_linalg, ONLY: cp_fm_trace,&
28 : cp_fm_uplo_to_full
29 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose,&
30 : cp_fm_cholesky_invert
31 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
32 : cp_fm_struct_release,&
33 : cp_fm_struct_type
34 : USE cp_fm_types, ONLY: cp_fm_create,&
35 : cp_fm_get_info,&
36 : cp_fm_release,&
37 : cp_fm_set_all,&
38 : cp_fm_to_fm_submat,&
39 : cp_fm_to_fm_submat_general,&
40 : cp_fm_type
41 : USE cp_log_handling, ONLY: cp_get_default_logger,&
42 : cp_logger_type
43 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
44 : cp_print_key_unit_nr
45 : USE cp_realspace_grid_cube, ONLY: cp_pw_to_cube
46 : USE input_constants, ONLY: bse_screening_alpha,&
47 : bse_screening_rpa,&
48 : bse_screening_tdhf,&
49 : use_mom_ref_coac
50 : USE input_section_types, ONLY: section_vals_type
51 : USE kinds, ONLY: default_path_length,&
52 : dp,&
53 : int_8
54 : USE message_passing, ONLY: mp_para_env_type,&
55 : mp_request_type
56 : USE moments_utils, ONLY: get_reference_point
57 : USE mp2_types, ONLY: integ_mat_buffer_type,&
58 : mp2_type
59 : USE parallel_gemm_api, ONLY: parallel_gemm
60 : USE particle_list_types, ONLY: particle_list_type
61 : USE particle_types, ONLY: particle_type
62 : USE physcon, ONLY: evolt
63 : USE pw_env_types, ONLY: pw_env_get,&
64 : pw_env_type
65 : USE pw_poisson_types, ONLY: pw_poisson_type
66 : USE pw_pool_types, ONLY: pw_pool_p_type,&
67 : pw_pool_type
68 : USE pw_types, ONLY: pw_c1d_gs_type,&
69 : pw_r3d_rs_type
70 : USE qs_collocate_density, ONLY: calculate_wavefunction
71 : USE qs_environment_types, ONLY: get_qs_env,&
72 : qs_environment_type
73 : USE qs_kind_types, ONLY: qs_kind_type
74 : USE qs_mo_types, ONLY: get_mo_set,&
75 : mo_set_type
76 : USE qs_moments, ONLY: build_local_moment_matrix
77 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
78 : USE qs_subsys_types, ONLY: qs_subsys_get,&
79 : qs_subsys_type
80 : USE rpa_communication, ONLY: communicate_buffer
81 : USE util, ONLY: sort,&
82 : sort_unique
83 : #include "./base/base_uses.f90"
84 :
85 : IMPLICIT NONE
86 :
87 : PRIVATE
88 :
89 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
90 :
91 : PUBLIC :: mult_B_with_W, fm_general_add_bse, truncate_fm, &
92 : deallocate_matrices_bse, comp_eigvec_coeff_BSE, sort_excitations, &
93 : estimate_BSE_resources, filter_eigvec_contrib, truncate_BSE_matrices, &
94 : determine_cutoff_indices, adapt_BSE_input_params, get_multipoles_mo, &
95 : reshuffle_eigvec, print_bse_nto_cubes, trace_exciton_descr
96 :
97 : CONTAINS
98 :
99 : ! **************************************************************************************************
100 : !> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
101 : !> \param fm_mat_S_ij_bse ...
102 : !> \param fm_mat_S_ia_bse ...
103 : !> \param fm_mat_S_bar_ia_bse ...
104 : !> \param fm_mat_S_bar_ij_bse ...
105 : !> \param fm_mat_Q_static_bse_gemm ...
106 : !> \param dimen_RI ...
107 : !> \param homo ...
108 : !> \param virtual ...
109 : ! **************************************************************************************************
110 252 : SUBROUTINE mult_B_with_W(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
111 : fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
112 : dimen_RI, homo, virtual)
113 :
114 : TYPE(cp_fm_type), INTENT(IN) :: fm_mat_S_ij_bse, fm_mat_S_ia_bse
115 : TYPE(cp_fm_type), INTENT(OUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse
116 : TYPE(cp_fm_type), INTENT(IN) :: fm_mat_Q_static_bse_gemm
117 : INTEGER, INTENT(IN) :: dimen_RI, homo, virtual
118 :
119 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mult_B_with_W'
120 :
121 : INTEGER :: handle, i_global, iiB, info_chol, &
122 : j_global, jjB, ncol_local, nrow_local
123 42 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
124 : TYPE(cp_fm_type) :: fm_work
125 :
126 42 : CALL timeset(routineN, handle)
127 :
128 42 : CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S_ia_bse%matrix_struct)
129 42 : CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
130 :
131 42 : CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
132 42 : CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
133 :
134 42 : CALL cp_fm_create(fm_work, fm_mat_Q_static_bse_gemm%matrix_struct)
135 42 : CALL cp_fm_set_all(fm_work, 0.0_dp)
136 :
137 : ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
138 : CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
139 : nrow_local=nrow_local, &
140 : ncol_local=ncol_local, &
141 : row_indices=row_indices, &
142 42 : col_indices=col_indices)
143 :
144 3528 : DO jjB = 1, ncol_local
145 3486 : j_global = col_indices(jjB)
146 148197 : DO iiB = 1, nrow_local
147 144669 : i_global = row_indices(iiB)
148 148155 : IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
149 1743 : fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
150 : END IF
151 : END DO
152 : END DO
153 :
154 : ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
155 42 : CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
156 :
157 42 : IF (info_chol /= 0) THEN
158 0 : CALL cp_abort(__LOCATION__, 'Cholesky decomposition failed for static polarization in BSE')
159 : END IF
160 :
161 : ! calculate [1+Q(i0)]^-1
162 42 : CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
163 :
164 : ! symmetrize the result
165 42 : CALL cp_fm_uplo_to_full(fm_mat_Q_static_bse_gemm, fm_work)
166 :
167 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo**2, k=dimen_RI, alpha=1.0_dp, &
168 : matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ij_bse, beta=0.0_dp, &
169 42 : matrix_c=fm_mat_S_bar_ij_bse)
170 :
171 : ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
172 : ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
173 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo*virtual, k=dimen_RI, alpha=1.0_dp, &
174 : matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ia_bse, beta=0.0_dp, &
175 42 : matrix_c=fm_mat_S_bar_ia_bse)
176 :
177 42 : CALL cp_fm_release(fm_work)
178 :
179 42 : CALL timestop(handle)
180 :
181 42 : END SUBROUTINE
182 :
183 : ! **************************************************************************************************
184 : !> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
185 : !> to A_ia, jb which needs MPI communication.
186 : !> \param fm_out ...
187 : !> \param fm_in ...
188 : !> \param beta ...
189 : !> \param nrow_secidx_in ...
190 : !> \param ncol_secidx_in ...
191 : !> \param nrow_secidx_out ...
192 : !> \param ncol_secidx_out ...
193 : !> \param unit_nr ...
194 : !> \param reordering ...
195 : !> \param mp2_env ...
196 : ! **************************************************************************************************
197 490 : SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
198 : nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
199 :
200 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_out
201 : TYPE(cp_fm_type), INTENT(IN) :: fm_in
202 : REAL(kind=dp) :: beta
203 : INTEGER, INTENT(IN) :: nrow_secidx_in, ncol_secidx_in, &
204 : nrow_secidx_out, ncol_secidx_out
205 : INTEGER :: unit_nr
206 : INTEGER, DIMENSION(4) :: reordering
207 : TYPE(mp2_type), INTENT(IN) :: mp2_env
208 :
209 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse'
210 :
211 : INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
212 : iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, nprocs, &
213 : nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, row_idx_loc, &
214 : send_pcol, send_prow
215 490 : INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, &
216 : num_entries_send
217 : INTEGER, DIMENSION(4) :: indices_in
218 490 : INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, &
219 490 : row_indices_in, row_indices_out
220 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
221 490 : DIMENSION(:) :: buffer_rec, buffer_send
222 : TYPE(mp_para_env_type), POINTER :: para_env_out
223 490 : TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_array
224 :
225 490 : CALL timeset(routineN, handle)
226 490 : CALL timeset(routineN//"_1_setup", handle2)
227 :
228 490 : para_env_out => fm_out%matrix_struct%para_env
229 : ! A_iajb
230 : ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
231 : CALL cp_fm_get_info(matrix=fm_out, &
232 : nrow_local=nrow_local_out, &
233 : ncol_local=ncol_local_out, &
234 : row_indices=row_indices_out, &
235 : col_indices=col_indices_out, &
236 : nrow_block=nrow_block_out, &
237 490 : ncol_block=ncol_block_out)
238 :
239 1470 : ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
240 1470 : ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
241 :
242 1470 : num_entries_rec(:) = 0
243 1470 : num_entries_send(:) = 0
244 :
245 490 : dummy = 0
246 :
247 : CALL cp_fm_get_info(matrix=fm_in, &
248 : nrow_local=nrow_local_in, &
249 : ncol_local=ncol_local_in, &
250 : row_indices=row_indices_in, &
251 : col_indices=col_indices_in, &
252 : nrow_block=nrow_block_in, &
253 490 : ncol_block=ncol_block_in)
254 :
255 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
256 94 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
257 188 : fm_out%matrix_struct%nrow_global
258 94 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
259 188 : fm_out%matrix_struct%ncol_global
260 :
261 94 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
262 94 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
263 :
264 94 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
265 188 : fm_in%matrix_struct%nrow_global
266 94 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
267 188 : fm_in%matrix_struct%ncol_global
268 :
269 94 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
270 94 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
271 : END IF
272 :
273 : ! Use scalapack wrapper to find process index in fm_out
274 : ! To that end, we obtain the global index in fm_out from the level indices
275 490 : indices_in(:) = 0
276 9342 : DO row_idx_loc = 1, nrow_local_in
277 8852 : indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
278 8852 : indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
279 93294 : DO col_idx_loc = 1, ncol_local_in
280 83952 : indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
281 83952 : indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
282 :
283 83952 : idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
284 83952 : idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
285 :
286 83952 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
287 83952 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
288 :
289 83952 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
290 :
291 92804 : num_entries_send(proc_send) = num_entries_send(proc_send) + 1
292 :
293 : END DO
294 : END DO
295 :
296 490 : CALL timestop(handle2)
297 :
298 490 : CALL timeset(routineN//"_2_comm_entry_nums", handle2)
299 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
300 94 : WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
301 : END IF
302 :
303 490 : CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
304 :
305 490 : CALL timestop(handle2)
306 :
307 490 : CALL timeset(routineN//"_3_alloc_buffer", handle2)
308 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
309 94 : WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
310 : END IF
311 :
312 : ! Buffers for entries and their indices
313 2450 : ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
314 2450 : ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
315 :
316 : ! allocate data message and corresponding indices
317 1470 : DO iproc = 0, para_env_out%num_pe - 1
318 :
319 2750 : ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
320 85422 : buffer_rec(iproc)%msg = 0.0_dp
321 :
322 : END DO
323 :
324 1470 : DO iproc = 0, para_env_out%num_pe - 1
325 :
326 2750 : ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
327 85422 : buffer_send(iproc)%msg = 0.0_dp
328 :
329 : END DO
330 :
331 1470 : DO iproc = 0, para_env_out%num_pe - 1
332 :
333 2750 : ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
334 171334 : buffer_rec(iproc)%indx = 0
335 :
336 : END DO
337 :
338 1470 : DO iproc = 0, para_env_out%num_pe - 1
339 :
340 2750 : ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
341 171334 : buffer_send(iproc)%indx = 0
342 :
343 : END DO
344 :
345 490 : CALL timestop(handle2)
346 :
347 490 : CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
348 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
349 94 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
350 : END IF
351 :
352 1470 : ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
353 1470 : entry_counter(:) = 0
354 :
355 : ! Now we can write the actual data and indices to the send-buffer
356 9342 : DO row_idx_loc = 1, nrow_local_in
357 8852 : indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
358 8852 : indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
359 93294 : DO col_idx_loc = 1, ncol_local_in
360 83952 : indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
361 83952 : indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
362 :
363 83952 : idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
364 83952 : idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
365 :
366 83952 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
367 83952 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
368 :
369 83952 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
370 83952 : entry_counter(proc_send) = entry_counter(proc_send) + 1
371 :
372 : buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
373 83952 : fm_in%local_data(row_idx_loc, col_idx_loc)
374 :
375 83952 : buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
376 92804 : buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
377 :
378 : END DO
379 : END DO
380 :
381 7350 : ALLOCATE (req_array(1:para_env_out%num_pe, 4))
382 :
383 490 : CALL timestop(handle2)
384 :
385 490 : CALL timeset(routineN//"_5_comm_buffer", handle2)
386 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
387 94 : WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
388 : END IF
389 :
390 : ! communicate the buffer
391 : CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
392 490 : buffer_send, req_array)
393 :
394 490 : CALL timestop(handle2)
395 :
396 490 : CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
397 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
398 94 : WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
399 : END IF
400 :
401 : ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
402 490 : nprocs = para_env_out%num_pe
403 :
404 : !$OMP PARALLEL DO DEFAULT(NONE) &
405 : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec, beta) &
406 490 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
407 : DO iproc = 0, nprocs - 1
408 : DO i_entry_rec = 1, num_entries_rec(iproc)
409 : ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
410 : jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
411 :
412 : fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
413 : END DO
414 : END DO
415 : !$OMP END PARALLEL DO
416 :
417 490 : CALL timestop(handle2)
418 :
419 490 : CALL timeset(routineN//"_7_cleanup", handle2)
420 490 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
421 94 : WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
422 : END IF
423 :
424 : !Clean up all the arrays from the communication process
425 1470 : DO iproc = 0, para_env_out%num_pe - 1
426 980 : DEALLOCATE (buffer_rec(iproc)%msg)
427 980 : DEALLOCATE (buffer_rec(iproc)%indx)
428 980 : DEALLOCATE (buffer_send(iproc)%msg)
429 1470 : DEALLOCATE (buffer_send(iproc)%indx)
430 : END DO
431 2450 : DEALLOCATE (buffer_rec, buffer_send)
432 490 : DEALLOCATE (req_array)
433 490 : DEALLOCATE (entry_counter)
434 490 : DEALLOCATE (num_entries_rec, num_entries_send)
435 :
436 490 : CALL timestop(handle2)
437 490 : CALL timestop(handle)
438 :
439 3430 : END SUBROUTINE fm_general_add_bse
440 :
441 : ! **************************************************************************************************
442 : !> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
443 : !> Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in for the incoming (untruncated) matrix
444 : !> and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
445 : !> via parallel communication.
446 : !> \param fm_out ...
447 : !> \param fm_in ...
448 : !> \param ncol_in ...
449 : !> \param nrow_out ...
450 : !> \param ncol_out ...
451 : !> \param unit_nr ...
452 : !> \param mp2_env ...
453 : !> \param nrow_offset ...
454 : !> \param ncol_offset ...
455 : ! **************************************************************************************************
456 126 : SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
457 : nrow_out, ncol_out, unit_nr, mp2_env, &
458 : nrow_offset, ncol_offset)
459 :
460 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_out
461 : TYPE(cp_fm_type), INTENT(IN) :: fm_in
462 : INTEGER :: ncol_in, nrow_out, ncol_out, unit_nr
463 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
464 : INTEGER, INTENT(IN), OPTIONAL :: nrow_offset, ncol_offset
465 :
466 : CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_fm'
467 :
468 : INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
469 : idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
470 : ncol_local_in, ncol_local_out, nprocs, nrow_block_in, nrow_block_out, nrow_local_in, &
471 : nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
472 126 : INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, &
473 : num_entries_send
474 126 : INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, &
475 126 : row_indices_in, row_indices_out
476 : LOGICAL :: correct_ncol, correct_nrow
477 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
478 126 : DIMENSION(:) :: buffer_rec, buffer_send
479 : TYPE(mp_para_env_type), POINTER :: para_env_out
480 126 : TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_array
481 :
482 126 : CALL timeset(routineN, handle)
483 126 : CALL timeset(routineN//"_1_setup", handle2)
484 :
485 126 : correct_nrow = .FALSE.
486 126 : correct_ncol = .FALSE.
487 : !In case of truncation in the occupied space, we need to correct the interval of indices
488 126 : IF (PRESENT(nrow_offset)) THEN
489 84 : correct_nrow = .TRUE.
490 : END IF
491 126 : IF (PRESENT(ncol_offset)) THEN
492 42 : correct_ncol = .TRUE.
493 : END IF
494 :
495 126 : para_env_out => fm_out%matrix_struct%para_env
496 :
497 : CALL cp_fm_get_info(matrix=fm_out, &
498 : nrow_local=nrow_local_out, &
499 : ncol_local=ncol_local_out, &
500 : row_indices=row_indices_out, &
501 : col_indices=col_indices_out, &
502 : nrow_block=nrow_block_out, &
503 126 : ncol_block=ncol_block_out)
504 :
505 378 : ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
506 378 : ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
507 :
508 378 : num_entries_rec(:) = 0
509 378 : num_entries_send(:) = 0
510 :
511 126 : dummy = 0
512 :
513 : CALL cp_fm_get_info(matrix=fm_in, &
514 : nrow_local=nrow_local_in, &
515 : ncol_local=ncol_local_in, &
516 : row_indices=row_indices_in, &
517 : col_indices=col_indices_in, &
518 : nrow_block=nrow_block_in, &
519 126 : ncol_block=ncol_block_in)
520 :
521 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
522 12 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
523 24 : fm_out%matrix_struct%nrow_global
524 12 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
525 24 : fm_out%matrix_struct%ncol_global
526 :
527 12 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
528 12 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
529 :
530 12 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
531 24 : fm_in%matrix_struct%nrow_global
532 12 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
533 24 : fm_in%matrix_struct%ncol_global
534 :
535 12 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
536 12 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
537 : END IF
538 :
539 : ! We find global indices in S with nrow_in and ncol_in for truncation
540 10038 : DO col_idx_loc = 1, ncol_local_in
541 9912 : idx_col_in = col_indices_in(col_idx_loc)
542 :
543 9912 : idx_col_first = (idx_col_in - 1)/ncol_in + 1
544 9912 : idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
545 :
546 : ! If occupied orbitals are included, these have to be handled differently
547 : ! due to their reversed indexing
548 9912 : IF (correct_nrow) THEN
549 3864 : idx_col_first = idx_col_first - nrow_offset + 1
550 3864 : IF (idx_col_first .LE. 0) CYCLE
551 : ELSE
552 6048 : IF (idx_col_first > nrow_out) EXIT
553 : END IF
554 9912 : IF (correct_ncol) THEN
555 672 : idx_col_sec = idx_col_sec - ncol_offset + 1
556 672 : IF (idx_col_sec .LE. 0) CYCLE
557 : ELSE
558 9240 : IF (idx_col_sec > ncol_out) CYCLE
559 : END IF
560 :
561 8736 : idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
562 :
563 371406 : DO row_idx_loc = 1, nrow_local_in
564 362544 : idx_row_in = row_indices_in(row_idx_loc)
565 :
566 362544 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
567 362544 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
568 :
569 362544 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
570 :
571 372456 : num_entries_send(proc_send) = num_entries_send(proc_send) + 1
572 :
573 : END DO
574 : END DO
575 :
576 126 : CALL timestop(handle2)
577 :
578 126 : CALL timeset(routineN//"_2_comm_entry_nums", handle2)
579 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
580 12 : WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
581 : END IF
582 :
583 126 : CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
584 :
585 126 : CALL timestop(handle2)
586 :
587 126 : CALL timeset(routineN//"_3_alloc_buffer", handle2)
588 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
589 12 : WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
590 : END IF
591 :
592 : ! Buffers for entries and their indices
593 630 : ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
594 630 : ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
595 :
596 : ! allocate data message and corresponding indices
597 378 : DO iproc = 0, para_env_out%num_pe - 1
598 :
599 672 : ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
600 362922 : buffer_rec(iproc)%msg = 0.0_dp
601 :
602 : END DO
603 :
604 378 : DO iproc = 0, para_env_out%num_pe - 1
605 :
606 672 : ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
607 362922 : buffer_send(iproc)%msg = 0.0_dp
608 :
609 : END DO
610 :
611 378 : DO iproc = 0, para_env_out%num_pe - 1
612 :
613 672 : ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
614 725970 : buffer_rec(iproc)%indx = 0
615 :
616 : END DO
617 :
618 378 : DO iproc = 0, para_env_out%num_pe - 1
619 :
620 672 : ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
621 725970 : buffer_send(iproc)%indx = 0
622 :
623 : END DO
624 :
625 126 : CALL timestop(handle2)
626 :
627 126 : CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
628 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
629 12 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
630 : END IF
631 :
632 378 : ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
633 378 : entry_counter(:) = 0
634 :
635 : ! Now we can write the actual data and indices to the send-buffer
636 10038 : DO col_idx_loc = 1, ncol_local_in
637 9912 : idx_col_in = col_indices_in(col_idx_loc)
638 :
639 9912 : idx_col_first = (idx_col_in - 1)/ncol_in + 1
640 9912 : idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
641 :
642 : ! If occupied orbitals are included, these have to be handled differently
643 : ! due to their reversed indexing
644 9912 : IF (correct_nrow) THEN
645 3864 : idx_col_first = idx_col_first - nrow_offset + 1
646 3864 : IF (idx_col_first .LE. 0) CYCLE
647 : ELSE
648 6048 : IF (idx_col_first > nrow_out) EXIT
649 : END IF
650 9912 : IF (correct_ncol) THEN
651 672 : idx_col_sec = idx_col_sec - ncol_offset + 1
652 672 : IF (idx_col_sec .LE. 0) CYCLE
653 : ELSE
654 9240 : IF (idx_col_sec > ncol_out) CYCLE
655 : END IF
656 :
657 8736 : idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
658 :
659 371406 : DO row_idx_loc = 1, nrow_local_in
660 362544 : idx_row_in = row_indices_in(row_idx_loc)
661 :
662 362544 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
663 :
664 362544 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
665 :
666 362544 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
667 362544 : entry_counter(proc_send) = entry_counter(proc_send) + 1
668 :
669 : buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
670 362544 : fm_in%local_data(row_idx_loc, col_idx_loc)
671 : !No need to create row_out, since it is identical to incoming
672 : !We dont change the RI index for any fm_mat_XX_BSE
673 362544 : buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
674 372456 : buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
675 :
676 : END DO
677 : END DO
678 :
679 1890 : ALLOCATE (req_array(1:para_env_out%num_pe, 4))
680 :
681 126 : CALL timestop(handle2)
682 :
683 126 : CALL timeset(routineN//"_5_comm_buffer", handle2)
684 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
685 12 : WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
686 : END IF
687 :
688 : ! communicate the buffer
689 : CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
690 126 : buffer_send, req_array)
691 :
692 126 : CALL timestop(handle2)
693 :
694 126 : CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
695 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
696 12 : WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
697 : END IF
698 :
699 : ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
700 126 : nprocs = para_env_out%num_pe
701 :
702 : !$OMP PARALLEL DO DEFAULT(NONE) &
703 : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec) &
704 126 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
705 : DO iproc = 0, nprocs - 1
706 : DO i_entry_rec = 1, num_entries_rec(iproc)
707 : ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
708 : jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
709 :
710 : fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
711 : END DO
712 : END DO
713 : !$OMP END PARALLEL DO
714 :
715 126 : CALL timestop(handle2)
716 :
717 126 : CALL timeset(routineN//"_7_cleanup", handle2)
718 126 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
719 12 : WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
720 : END IF
721 :
722 : !Clean up all the arrays from the communication process
723 378 : DO iproc = 0, para_env_out%num_pe - 1
724 252 : DEALLOCATE (buffer_rec(iproc)%msg)
725 252 : DEALLOCATE (buffer_rec(iproc)%indx)
726 252 : DEALLOCATE (buffer_send(iproc)%msg)
727 378 : DEALLOCATE (buffer_send(iproc)%indx)
728 : END DO
729 630 : DEALLOCATE (buffer_rec, buffer_send)
730 126 : DEALLOCATE (req_array)
731 126 : DEALLOCATE (entry_counter)
732 126 : DEALLOCATE (num_entries_rec, num_entries_send)
733 :
734 126 : CALL timestop(handle2)
735 126 : CALL timestop(handle)
736 :
737 1008 : END SUBROUTINE truncate_fm
738 :
739 : ! **************************************************************************************************
740 : !> \brief ...
741 : !> \param fm_mat_S_bar_ia_bse ...
742 : !> \param fm_mat_S_bar_ij_bse ...
743 : !> \param fm_mat_S_trunc ...
744 : !> \param fm_mat_S_ij_trunc ...
745 : !> \param fm_mat_S_ab_trunc ...
746 : !> \param fm_mat_Q_static_bse_gemm ...
747 : !> \param mp2_env ...
748 : ! **************************************************************************************************
749 42 : SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
750 : fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
751 : fm_mat_Q_static_bse_gemm, mp2_env)
752 :
753 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, fm_mat_S_trunc, &
754 : fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, fm_mat_Q_static_bse_gemm
755 : TYPE(mp2_type) :: mp2_env
756 :
757 : CHARACTER(LEN=*), PARAMETER :: routineN = 'deallocate_matrices_bse'
758 :
759 : INTEGER :: handle
760 :
761 42 : CALL timeset(routineN, handle)
762 :
763 42 : CALL cp_fm_release(fm_mat_S_bar_ia_bse)
764 42 : CALL cp_fm_release(fm_mat_S_bar_ij_bse)
765 42 : CALL cp_fm_release(fm_mat_S_trunc)
766 42 : CALL cp_fm_release(fm_mat_S_ij_trunc)
767 42 : CALL cp_fm_release(fm_mat_S_ab_trunc)
768 42 : CALL cp_fm_release(fm_mat_Q_static_bse_gemm)
769 42 : IF (mp2_env%bse%do_nto_analysis) THEN
770 4 : DEALLOCATE (mp2_env%bse%bse_nto_state_list_final)
771 : END IF
772 :
773 42 : CALL timestop(handle)
774 :
775 42 : END SUBROUTINE deallocate_matrices_bse
776 :
777 : ! **************************************************************************************************
778 : !> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
779 : !> multiplication with the eigenvalues
780 : !> \param fm_work ...
781 : !> \param eig_vals ...
782 : !> \param beta ...
783 : !> \param gamma ...
784 : !> \param do_transpose ...
785 : ! **************************************************************************************************
786 104 : SUBROUTINE comp_eigvec_coeff_BSE(fm_work, eig_vals, beta, gamma, do_transpose)
787 :
788 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_work
789 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
790 : INTENT(IN) :: eig_vals
791 : REAL(KIND=dp), INTENT(IN) :: beta
792 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: gamma
793 : LOGICAL, INTENT(IN), OPTIONAL :: do_transpose
794 :
795 : CHARACTER(LEN=*), PARAMETER :: routineN = 'comp_eigvec_coeff_BSE'
796 :
797 : INTEGER :: handle, i_row_global, ii, j_col_global, &
798 : jj, ncol_local, nrow_local
799 52 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
800 : LOGICAL :: my_do_transpose
801 : REAL(KIND=dp) :: coeff, my_gamma
802 :
803 52 : CALL timeset(routineN, handle)
804 :
805 52 : IF (PRESENT(gamma)) THEN
806 52 : my_gamma = gamma
807 : ELSE
808 : my_gamma = 2.0_dp
809 : END IF
810 :
811 52 : IF (PRESENT(do_transpose)) THEN
812 52 : my_do_transpose = do_transpose
813 : ELSE
814 : my_do_transpose = .FALSE.
815 : END IF
816 :
817 : CALL cp_fm_get_info(matrix=fm_work, &
818 : nrow_local=nrow_local, &
819 : ncol_local=ncol_local, &
820 : row_indices=row_indices, &
821 52 : col_indices=col_indices)
822 :
823 52 : IF (my_do_transpose) THEN
824 2548 : DO jj = 1, ncol_local
825 2496 : j_col_global = col_indices(jj)
826 62452 : DO ii = 1, nrow_local
827 59904 : coeff = (eig_vals(j_col_global)**beta)/my_gamma
828 62400 : fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
829 : END DO
830 : END DO
831 : ELSE
832 0 : DO jj = 1, ncol_local
833 0 : DO ii = 1, nrow_local
834 0 : i_row_global = row_indices(ii)
835 0 : coeff = (eig_vals(i_row_global)**beta)/my_gamma
836 0 : fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
837 : END DO
838 : END DO
839 : END IF
840 :
841 52 : CALL timestop(handle)
842 :
843 52 : END SUBROUTINE
844 :
845 : ! **************************************************************************************************
846 : !> \brief ...
847 : !> \param idx_prim ...
848 : !> \param idx_sec ...
849 : !> \param eigvec_entries ...
850 : ! **************************************************************************************************
851 1700 : SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
852 :
853 : INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_prim, idx_sec
854 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries
855 :
856 : CHARACTER(LEN=*), PARAMETER :: routineN = 'sort_excitations'
857 :
858 : INTEGER :: handle, ii, kk, num_entries, num_mults
859 1700 : INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_prim_work, idx_sec_work, tmp_index
860 : LOGICAL :: unique_entries
861 1700 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries_work
862 :
863 1700 : CALL timeset(routineN, handle)
864 :
865 1700 : num_entries = SIZE(idx_prim)
866 :
867 4450 : ALLOCATE (tmp_index(num_entries))
868 :
869 1700 : CALL sort(idx_prim, num_entries, tmp_index)
870 :
871 2750 : ALLOCATE (idx_sec_work(num_entries))
872 4450 : ALLOCATE (eigvec_entries_work(num_entries))
873 :
874 4274 : DO ii = 1, num_entries
875 2574 : idx_sec_work(ii) = idx_sec(tmp_index(ii))
876 4274 : eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
877 : END DO
878 :
879 1700 : DEALLOCATE (tmp_index)
880 1700 : DEALLOCATE (idx_sec)
881 1700 : DEALLOCATE (eigvec_entries)
882 :
883 1700 : CALL MOVE_ALLOC(idx_sec_work, idx_sec)
884 1700 : CALL MOVE_ALLOC(eigvec_entries_work, eigvec_entries)
885 :
886 : !Now check for multiple entries in first idx to check necessity of sorting in second idx
887 1700 : CALL sort_unique(idx_prim, unique_entries)
888 1700 : IF (.NOT. unique_entries) THEN
889 508 : ALLOCATE (idx_prim_work(num_entries))
890 1452 : idx_prim_work(:) = idx_prim(:)
891 : ! Find duplicate entries in idx_prim
892 1452 : DO ii = 1, num_entries
893 1198 : IF (idx_prim_work(ii) == 0) CYCLE
894 4908 : num_mults = COUNT(idx_prim_work == idx_prim_work(ii))
895 804 : IF (num_mults > 1) THEN
896 : !Set all duplicate entries to 0
897 1034 : idx_prim_work(ii:ii + num_mults - 1) = 0
898 : !Start sorting in secondary index
899 960 : ALLOCATE (idx_sec_work(num_mults))
900 960 : ALLOCATE (eigvec_entries_work(num_mults))
901 1034 : idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
902 1034 : eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
903 640 : ALLOCATE (tmp_index(num_mults))
904 320 : CALL sort(idx_sec_work, num_mults, tmp_index)
905 :
906 : !Now write newly sorted indices to original arrays
907 1034 : DO kk = ii, ii + num_mults - 1
908 714 : idx_sec(kk) = idx_sec_work(kk - ii + 1)
909 1034 : eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
910 : END DO
911 : !Deallocate work arrays
912 320 : DEALLOCATE (tmp_index)
913 320 : DEALLOCATE (idx_sec_work)
914 320 : DEALLOCATE (eigvec_entries_work)
915 : END IF
916 1452 : idx_prim_work(ii) = idx_prim(ii)
917 : END DO
918 254 : DEALLOCATE (idx_prim_work)
919 : END IF
920 :
921 1700 : CALL timestop(handle)
922 :
923 5100 : END SUBROUTINE sort_excitations
924 :
925 : ! **************************************************************************************************
926 : !> \brief Roughly estimates the needed runtime and memory during the BSE run
927 : !> \param homo_red ...
928 : !> \param virtual_red ...
929 : !> \param unit_nr ...
930 : !> \param bse_abba ...
931 : !> \param para_env ...
932 : !> \param diag_runtime_est ...
933 : ! **************************************************************************************************
934 42 : SUBROUTINE estimate_BSE_resources(homo_red, virtual_red, unit_nr, bse_abba, &
935 : para_env, diag_runtime_est)
936 :
937 : INTEGER :: homo_red, virtual_red, unit_nr
938 : LOGICAL :: bse_abba
939 : TYPE(mp_para_env_type), POINTER :: para_env
940 : REAL(KIND=dp) :: diag_runtime_est
941 :
942 : CHARACTER(LEN=*), PARAMETER :: routineN = 'estimate_BSE_resources'
943 :
944 : INTEGER :: handle, num_BSE_matrices
945 : INTEGER(KIND=int_8) :: full_dim
946 : REAL(KIND=dp) :: mem_est, mem_est_per_rank
947 :
948 42 : CALL timeset(routineN, handle)
949 :
950 : ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
951 42 : num_BSE_matrices = 2
952 : ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
953 : ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
954 42 : IF (bse_abba) THEN
955 26 : num_BSE_matrices = 10
956 : END IF
957 :
958 42 : full_dim = (INT(homo_red, KIND=int_8)**2*INT(virtual_red, KIND=int_8)**2)*INT(num_BSE_matrices, KIND=int_8)
959 42 : mem_est = REAL(8*full_dim, KIND=dp)/REAL(1024**3, KIND=dp)
960 42 : mem_est_per_rank = REAL(mem_est/para_env%num_pe, KIND=dp)
961 :
962 42 : IF (unit_nr > 0) THEN
963 : ! WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
964 : ! mem_est
965 21 : WRITE (unit_nr, '(T2,A4,T7,A40,T68,ES13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
966 42 : mem_est
967 21 : WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
968 42 : mem_est_per_rank
969 21 : WRITE (unit_nr, '(T2,A4)') 'BSE|'
970 : END IF
971 : ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
972 : ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
973 : diag_runtime_est = REAL(INT(homo_red, KIND=int_8)*INT(virtual_red, KIND=int_8)/11000_int_8, KIND=dp)**3* &
974 42 : 10*32/REAL(para_env%num_pe, KIND=dp)
975 :
976 42 : CALL timestop(handle)
977 :
978 42 : END SUBROUTINE estimate_BSE_resources
979 :
980 : ! **************************************************************************************************
981 : !> \brief Filters eigenvector entries above a given threshold to describe excitations in the
982 : !> singleparticle basis
983 : !> \param fm_eigvec ...
984 : !> \param idx_homo ...
985 : !> \param idx_virt ...
986 : !> \param eigvec_entries ...
987 : !> \param i_exc ...
988 : !> \param virtual ...
989 : !> \param num_entries ...
990 : !> \param mp2_env ...
991 : ! **************************************************************************************************
992 1700 : SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
993 : i_exc, virtual, num_entries, mp2_env)
994 :
995 : TYPE(cp_fm_type), INTENT(IN) :: fm_eigvec
996 : INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_homo, idx_virt
997 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries
998 : INTEGER :: i_exc, virtual, num_entries
999 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1000 :
1001 : CHARACTER(LEN=*), PARAMETER :: routineN = 'filter_eigvec_contrib'
1002 :
1003 : INTEGER :: eigvec_idx, handle, ii, iproc, jj, kk, &
1004 : ncol_local, nrow_local, &
1005 : num_entries_local
1006 : INTEGER, ALLOCATABLE, DIMENSION(:) :: num_entries_to_comm
1007 1700 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1008 : REAL(KIND=dp) :: eigvec_entry
1009 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1010 1700 : DIMENSION(:) :: buffer_entries
1011 : TYPE(mp_para_env_type), POINTER :: para_env
1012 :
1013 1700 : CALL timeset(routineN, handle)
1014 :
1015 1700 : para_env => fm_eigvec%matrix_struct%para_env
1016 :
1017 : CALL cp_fm_get_info(matrix=fm_eigvec, &
1018 : nrow_local=nrow_local, &
1019 : ncol_local=ncol_local, &
1020 : row_indices=row_indices, &
1021 1700 : col_indices=col_indices)
1022 :
1023 5100 : ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
1024 5100 : num_entries_to_comm(:) = 0
1025 :
1026 83300 : DO jj = 1, ncol_local
1027 : !First check if i is localized on this proc
1028 81600 : IF (col_indices(jj) /= i_exc) THEN
1029 : CYCLE
1030 : END IF
1031 44200 : DO ii = 1, nrow_local
1032 40800 : eigvec_idx = row_indices(ii)
1033 40800 : eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
1034 122400 : IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
1035 1287 : num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
1036 : END IF
1037 : END DO
1038 : END DO
1039 :
1040 : !Gather number of entries of other processes
1041 1700 : CALL para_env%sum(num_entries_to_comm)
1042 :
1043 1700 : num_entries_local = num_entries_to_comm(para_env%mepos)
1044 :
1045 8500 : ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
1046 :
1047 5100 : DO iproc = 0, para_env%num_pe - 1
1048 8454 : ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
1049 8454 : ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
1050 5974 : buffer_entries(iproc)%msg = 0.0_dp
1051 17048 : buffer_entries(iproc)%indx = 0
1052 : END DO
1053 :
1054 : kk = 1
1055 83300 : DO jj = 1, ncol_local
1056 : !First check if i is localized on this proc
1057 81600 : IF (col_indices(jj) /= i_exc) THEN
1058 : CYCLE
1059 : END IF
1060 44200 : DO ii = 1, nrow_local
1061 40800 : eigvec_idx = row_indices(ii)
1062 40800 : eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
1063 122400 : IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
1064 1287 : buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
1065 1287 : buffer_entries(para_env%mepos)%indx(kk, 2) = MOD(eigvec_idx - 1, virtual) + 1
1066 1287 : buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
1067 1287 : kk = kk + 1
1068 : END IF
1069 : END DO
1070 : END DO
1071 :
1072 5100 : DO iproc = 0, para_env%num_pe - 1
1073 3400 : CALL para_env%sum(buffer_entries(iproc)%msg)
1074 5100 : CALL para_env%sum(buffer_entries(iproc)%indx)
1075 : END DO
1076 :
1077 : !Now sum up gathered information
1078 5100 : num_entries = SUM(num_entries_to_comm)
1079 4450 : ALLOCATE (idx_homo(num_entries))
1080 2750 : ALLOCATE (idx_virt(num_entries))
1081 4450 : ALLOCATE (eigvec_entries(num_entries))
1082 :
1083 1700 : kk = 1
1084 5100 : DO iproc = 0, para_env%num_pe - 1
1085 5100 : IF (num_entries_to_comm(iproc) /= 0) THEN
1086 4228 : DO ii = 1, num_entries_to_comm(iproc)
1087 2574 : idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
1088 2574 : idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
1089 2574 : eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
1090 4228 : kk = kk + 1
1091 : END DO
1092 : END IF
1093 : END DO
1094 :
1095 : !Deallocate all the used arrays
1096 5100 : DO iproc = 0, para_env%num_pe - 1
1097 3400 : DEALLOCATE (buffer_entries(iproc)%msg)
1098 5100 : DEALLOCATE (buffer_entries(iproc)%indx)
1099 : END DO
1100 6800 : DEALLOCATE (buffer_entries)
1101 1700 : DEALLOCATE (num_entries_to_comm)
1102 1700 : NULLIFY (row_indices)
1103 1700 : NULLIFY (col_indices)
1104 :
1105 : !Now sort the results according to the involved singleparticle orbitals
1106 : ! (homo first, then virtual)
1107 1700 : CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
1108 :
1109 1700 : CALL timestop(handle)
1110 :
1111 1700 : END SUBROUTINE
1112 :
1113 : ! **************************************************************************************************
1114 : !> \brief Reads cutoffs for BSE from mp2_env and compares to energies in Eigenval to extract
1115 : !> reduced homo/virtual and
1116 : !> \param Eigenval array (1d) with energies, can be e.g. from GW or DFT
1117 : !> \param homo Total number of occupied orbitals
1118 : !> \param virtual Total number of unoccupied orbitals
1119 : !> \param homo_red Total number of occupied orbitals to include after cutoff
1120 : !> \param virt_red Total number of unoccupied orbitals to include after ctuoff
1121 : !> \param homo_incl First occupied index to include after cutoff
1122 : !> \param virt_incl Last unoccupied index to include after cutoff
1123 : !> \param mp2_env ...
1124 : ! **************************************************************************************************
1125 84 : SUBROUTINE determine_cutoff_indices(Eigenval, &
1126 : homo, virtual, &
1127 : homo_red, virt_red, &
1128 : homo_incl, virt_incl, &
1129 : mp2_env)
1130 :
1131 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval
1132 : INTEGER, INTENT(IN) :: homo, virtual
1133 : INTEGER, INTENT(OUT) :: homo_red, virt_red, homo_incl, virt_incl
1134 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1135 :
1136 : CHARACTER(LEN=*), PARAMETER :: routineN = 'determine_cutoff_indices'
1137 :
1138 : INTEGER :: handle, i_homo, j_virt
1139 :
1140 84 : CALL timeset(routineN, handle)
1141 : ! Determine index in homo and virtual for truncation
1142 : ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
1143 84 : IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
1144 : IF (-mp2_env%bse%bse_cutoff_occ .LT. Eigenval(1) - Eigenval(homo) &
1145 84 : .OR. mp2_env%bse%bse_cutoff_occ < 0) THEN
1146 84 : homo_red = homo
1147 84 : homo_incl = 1
1148 : ELSE
1149 0 : homo_incl = 1
1150 0 : DO i_homo = 1, homo
1151 0 : IF (Eigenval(i_homo) - Eigenval(homo) .GT. -mp2_env%bse%bse_cutoff_occ) THEN
1152 0 : homo_incl = i_homo
1153 0 : EXIT
1154 : END IF
1155 : END DO
1156 0 : homo_red = homo - homo_incl + 1
1157 : END IF
1158 :
1159 : IF (mp2_env%bse%bse_cutoff_empty .GT. Eigenval(homo + virtual) - Eigenval(homo + 1) &
1160 84 : .OR. mp2_env%bse%bse_cutoff_empty < 0) THEN
1161 0 : virt_red = virtual
1162 0 : virt_incl = virtual
1163 : ELSE
1164 84 : virt_incl = homo + 1
1165 1092 : DO j_virt = 1, virtual
1166 1092 : IF (Eigenval(homo + j_virt) - Eigenval(homo + 1) .GT. mp2_env%bse%bse_cutoff_empty) THEN
1167 84 : virt_incl = j_virt - 1
1168 84 : EXIT
1169 : END IF
1170 : END DO
1171 84 : virt_red = virt_incl
1172 : END IF
1173 : ELSE
1174 0 : homo_red = homo
1175 0 : virt_red = virtual
1176 0 : homo_incl = 1
1177 0 : virt_incl = virtual
1178 : END IF
1179 :
1180 84 : CALL timestop(handle)
1181 :
1182 84 : END SUBROUTINE
1183 :
1184 : ! **************************************************************************************************
1185 : !> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
1186 : !> \param fm_mat_S_ia_bse ...
1187 : !> \param fm_mat_S_ij_bse ...
1188 : !> \param fm_mat_S_ab_bse ...
1189 : !> \param fm_mat_S_trunc ...
1190 : !> \param fm_mat_S_ij_trunc ...
1191 : !> \param fm_mat_S_ab_trunc ...
1192 : !> \param Eigenval_scf ...
1193 : !> \param Eigenval ...
1194 : !> \param Eigenval_reduced ...
1195 : !> \param homo ...
1196 : !> \param virtual ...
1197 : !> \param dimen_RI ...
1198 : !> \param unit_nr ...
1199 : !> \param bse_lev_virt ...
1200 : !> \param homo_red ...
1201 : !> \param virt_red ...
1202 : !> \param mp2_env ...
1203 : ! **************************************************************************************************
1204 210 : SUBROUTINE truncate_BSE_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
1205 : fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
1206 42 : Eigenval_scf, Eigenval, Eigenval_reduced, &
1207 : homo, virtual, dimen_RI, unit_nr, &
1208 : bse_lev_virt, &
1209 : homo_red, virt_red, &
1210 : mp2_env)
1211 :
1212 : TYPE(cp_fm_type), INTENT(IN) :: fm_mat_S_ia_bse, fm_mat_S_ij_bse, &
1213 : fm_mat_S_ab_bse
1214 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_trunc, fm_mat_S_ij_trunc, &
1215 : fm_mat_S_ab_trunc
1216 : REAL(KIND=dp), DIMENSION(:) :: Eigenval_scf, Eigenval
1217 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: Eigenval_reduced
1218 : INTEGER, INTENT(IN) :: homo, virtual, dimen_RI, unit_nr, &
1219 : bse_lev_virt
1220 : INTEGER, INTENT(OUT) :: homo_red, virt_red
1221 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1222 :
1223 : CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_BSE_matrices'
1224 :
1225 : INTEGER :: handle, homo_incl, virt_incl
1226 : TYPE(cp_blacs_env_type), POINTER :: context
1227 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_ab, fm_struct_ia, fm_struct_ij
1228 : TYPE(mp_para_env_type), POINTER :: para_env
1229 :
1230 42 : CALL timeset(routineN, handle)
1231 :
1232 : ! Determine index in homo and virtual for truncation
1233 : ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
1234 :
1235 : CALL determine_cutoff_indices(Eigenval_scf, &
1236 : homo, virtual, &
1237 : homo_red, virt_red, &
1238 : homo_incl, virt_incl, &
1239 42 : mp2_env)
1240 :
1241 42 : IF (unit_nr > 0) THEN
1242 21 : IF (mp2_env%bse%bse_cutoff_occ > 0) THEN
1243 21 : WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
1244 42 : mp2_env%bse%bse_cutoff_occ*evolt
1245 : ELSE
1246 0 : WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
1247 : END IF
1248 21 : IF (mp2_env%bse%bse_cutoff_empty > 0) THEN
1249 21 : WRITE (unit_nr, '(T2,A4,T7,A26,T71,F10.3)') 'BSE|', 'Cutoff empty orbitals [eV]', &
1250 42 : mp2_env%bse%bse_cutoff_empty*evolt
1251 : ELSE
1252 0 : WRITE (unit_nr, '(T2,A4,T7,A34)') 'BSE|', 'No cutoff given for empty orbitals'
1253 : END IF
1254 21 : WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
1255 21 : WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Last empty index (not MO index!)', virt_incl
1256 21 : WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', Eigenval(homo_incl)*evolt
1257 21 : WRITE (unit_nr, '(T2,A4,T7,A31,T71,F10.3)') 'BSE|', 'Energy of last empty index [eV]', Eigenval(homo + virt_incl)*evolt
1258 21 : WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
1259 42 : -(Eigenval(homo_incl) - Eigenval(homo))*evolt
1260 21 : WRITE (unit_nr, '(T2,A4,T7,A50,T71,F10.3)') 'BSE|', 'Energy difference of last empty index to LUMO [eV]', &
1261 42 : (Eigenval(homo + virt_incl) - Eigenval(homo + 1))*evolt
1262 21 : WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
1263 21 : WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Number of GW-corrected empty MOs', mp2_env%ri_g0w0%corr_mos_virt
1264 21 : WRITE (unit_nr, '(T2,A4)') 'BSE|'
1265 : END IF
1266 42 : IF (unit_nr > 0) THEN
1267 21 : IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
1268 0 : CPABORT("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
1269 : END IF
1270 21 : IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
1271 0 : CPABORT("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
1272 : END IF
1273 : END IF
1274 : !Truncate full fm_S matrices
1275 : !Allocate new truncated matrices of proper size
1276 42 : para_env => fm_mat_S_ia_bse%matrix_struct%para_env
1277 42 : context => fm_mat_S_ia_bse%matrix_struct%context
1278 :
1279 42 : CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_RI, homo_red*virt_red)
1280 42 : CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_RI, homo_red*homo_red)
1281 42 : CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_RI, virt_red*virt_red)
1282 :
1283 42 : CALL cp_fm_create(fm_mat_S_trunc, fm_struct_ia, "fm_S_trunc")
1284 42 : CALL cp_fm_create(fm_mat_S_ij_trunc, fm_struct_ij, "fm_S_ij_trunc")
1285 42 : CALL cp_fm_create(fm_mat_S_ab_trunc, fm_struct_ab, "fm_S_ab_trunc")
1286 :
1287 : !Copy parts of original matrices to truncated ones
1288 42 : IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
1289 : !Truncate eigenvals
1290 126 : ALLOCATE (Eigenval_reduced(homo_red + virt_red))
1291 : ! Include USE_KS_ENERGIES input
1292 42 : IF (mp2_env%bse%use_ks_energies) THEN
1293 0 : Eigenval_reduced(:) = Eigenval_scf(homo_incl:homo + virt_incl)
1294 : ELSE
1295 714 : Eigenval_reduced(:) = Eigenval(homo_incl:homo + virt_incl)
1296 : END IF
1297 :
1298 : CALL truncate_fm(fm_mat_S_trunc, fm_mat_S_ia_bse, virtual, &
1299 : homo_red, virt_red, unit_nr, mp2_env, &
1300 42 : nrow_offset=homo_incl)
1301 : CALL truncate_fm(fm_mat_S_ij_trunc, fm_mat_S_ij_bse, homo, &
1302 : homo_red, homo_red, unit_nr, mp2_env, &
1303 42 : homo_incl, homo_incl)
1304 : CALL truncate_fm(fm_mat_S_ab_trunc, fm_mat_S_ab_bse, bse_lev_virt, &
1305 42 : virt_red, virt_red, unit_nr, mp2_env)
1306 :
1307 : ELSE
1308 0 : IF (unit_nr > 0) THEN
1309 0 : WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
1310 0 : WRITE (unit_nr, '(T2,A4)') 'BSE|'
1311 : END IF
1312 0 : ALLOCATE (Eigenval_reduced(homo_red + virt_red))
1313 : ! Include USE_KS_ENERGIES input
1314 0 : IF (mp2_env%bse%use_ks_energies) THEN
1315 0 : Eigenval_reduced(:) = Eigenval_scf(:)
1316 : ELSE
1317 0 : Eigenval_reduced(:) = Eigenval(:)
1318 : END IF
1319 : CALL cp_fm_to_fm_submat_general(fm_mat_S_ia_bse, fm_mat_S_trunc, dimen_RI, homo_red*virt_red, &
1320 0 : 1, 1, 1, 1, context)
1321 : CALL cp_fm_to_fm_submat_general(fm_mat_S_ij_bse, fm_mat_S_ij_trunc, dimen_RI, homo_red*homo_red, &
1322 0 : 1, 1, 1, 1, context)
1323 : CALL cp_fm_to_fm_submat_general(fm_mat_S_ab_bse, fm_mat_S_ab_trunc, dimen_RI, virt_red*virt_red, &
1324 0 : 1, 1, 1, 1, context)
1325 : END IF
1326 :
1327 42 : CALL cp_fm_struct_release(fm_struct_ia)
1328 42 : CALL cp_fm_struct_release(fm_struct_ij)
1329 42 : CALL cp_fm_struct_release(fm_struct_ab)
1330 :
1331 42 : NULLIFY (para_env)
1332 42 : NULLIFY (context)
1333 :
1334 42 : CALL timestop(handle)
1335 :
1336 42 : END SUBROUTINE truncate_BSE_matrices
1337 :
1338 : ! **************************************************************************************************
1339 : !> \brief ...
1340 : !> \param fm_eigvec ...
1341 : !> \param fm_eigvec_reshuffled ...
1342 : !> \param homo ...
1343 : !> \param virtual ...
1344 : !> \param n_exc ...
1345 : !> \param do_transpose ...
1346 : !> \param unit_nr ...
1347 : !> \param mp2_env ...
1348 : ! **************************************************************************************************
1349 900 : SUBROUTINE reshuffle_eigvec(fm_eigvec, fm_eigvec_reshuffled, homo, virtual, n_exc, do_transpose, &
1350 : unit_nr, mp2_env)
1351 :
1352 : TYPE(cp_fm_type), INTENT(IN) :: fm_eigvec
1353 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_eigvec_reshuffled
1354 : INTEGER, INTENT(IN) :: homo, virtual, n_exc
1355 : LOGICAL, INTENT(IN) :: do_transpose
1356 : INTEGER, INTENT(IN) :: unit_nr
1357 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1358 :
1359 : CHARACTER(LEN=*), PARAMETER :: routineN = 'reshuffle_eigvec'
1360 :
1361 : INTEGER :: handle, my_m_col, my_n_row
1362 : INTEGER, DIMENSION(4) :: reordering
1363 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_eigvec_col, &
1364 : fm_struct_eigvec_reshuffled
1365 : TYPE(cp_fm_type) :: fm_eigvec_col
1366 :
1367 300 : CALL timeset(routineN, handle)
1368 :
1369 : ! Define reordering:
1370 : ! (ia,11) to (a1,i1) for transposition
1371 : ! (ia,11) to (i1,a1) for default
1372 300 : IF (do_transpose) THEN
1373 50 : reordering = (/2, 3, 1, 4/)
1374 50 : my_n_row = virtual
1375 50 : my_m_col = homo
1376 : ELSE
1377 250 : reordering = (/1, 3, 2, 4/)
1378 250 : my_n_row = homo
1379 250 : my_m_col = virtual
1380 : END IF
1381 :
1382 : CALL cp_fm_struct_create(fm_struct_eigvec_col, &
1383 : fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
1384 300 : homo*virtual, 1)
1385 : CALL cp_fm_struct_create(fm_struct_eigvec_reshuffled, &
1386 : fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
1387 300 : my_n_row, my_m_col)
1388 :
1389 : ! Resort indices
1390 300 : CALL cp_fm_create(fm_eigvec_col, fm_struct_eigvec_col, name="BSE_column_vector")
1391 300 : CALL cp_fm_set_all(fm_eigvec_col, 0.0_dp)
1392 300 : CALL cp_fm_create(fm_eigvec_reshuffled, fm_struct_eigvec_reshuffled, name="BSE_reshuffled_eigenvector")
1393 300 : CALL cp_fm_set_all(fm_eigvec_reshuffled, 0.0_dp)
1394 : ! Fill matrix
1395 : CALL cp_fm_to_fm_submat(fm_eigvec, fm_eigvec_col, &
1396 : homo*virtual, 1, &
1397 : 1, n_exc, &
1398 300 : 1, 1)
1399 : ! Reshuffle
1400 : CALL fm_general_add_bse(fm_eigvec_reshuffled, fm_eigvec_col, 1.0_dp, &
1401 : virtual, 1, &
1402 : 1, 1, &
1403 300 : unit_nr, reordering, mp2_env)
1404 :
1405 300 : CALL cp_fm_release(fm_eigvec_col)
1406 300 : CALL cp_fm_struct_release(fm_struct_eigvec_col)
1407 300 : CALL cp_fm_struct_release(fm_struct_eigvec_reshuffled)
1408 :
1409 300 : CALL timestop(handle)
1410 :
1411 300 : END SUBROUTINE reshuffle_eigvec
1412 :
1413 : ! **************************************************************************************************
1414 : !> \brief Borrowed from the tddfpt module with slight adaptions
1415 : !> \param qs_env ...
1416 : !> \param mos ...
1417 : !> \param istate ...
1418 : !> \param info_approximation ...
1419 : !> \param stride ...
1420 : !> \param append_cube ...
1421 : !> \param print_section ...
1422 : ! **************************************************************************************************
1423 0 : SUBROUTINE print_bse_nto_cubes(qs_env, mos, istate, info_approximation, &
1424 : stride, append_cube, print_section)
1425 :
1426 : TYPE(qs_environment_type), POINTER :: qs_env
1427 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN) :: mos
1428 : INTEGER, INTENT(IN) :: istate
1429 : CHARACTER(LEN=10) :: info_approximation
1430 : INTEGER, DIMENSION(:), POINTER :: stride
1431 : LOGICAL, INTENT(IN) :: append_cube
1432 : TYPE(section_vals_type), POINTER :: print_section
1433 :
1434 : CHARACTER(LEN=*), PARAMETER :: routineN = 'print_bse_nto_cubes'
1435 :
1436 : CHARACTER(LEN=default_path_length) :: filename, info_approx_trunc, &
1437 : my_pos_cube, title
1438 : INTEGER :: handle, i, iset, nmo, unit_nr_cube
1439 : LOGICAL :: mpi_io
1440 0 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1441 : TYPE(cell_type), POINTER :: cell
1442 : TYPE(cp_fm_type), POINTER :: mo_coeff
1443 : TYPE(cp_logger_type), POINTER :: logger
1444 : TYPE(dft_control_type), POINTER :: dft_control
1445 : TYPE(particle_list_type), POINTER :: particles
1446 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1447 : TYPE(pw_c1d_gs_type) :: wf_g
1448 : TYPE(pw_env_type), POINTER :: pw_env
1449 0 : TYPE(pw_pool_p_type), DIMENSION(:), POINTER :: pw_pools
1450 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
1451 : TYPE(pw_r3d_rs_type) :: wf_r
1452 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1453 : TYPE(qs_subsys_type), POINTER :: subsys
1454 :
1455 0 : logger => cp_get_default_logger()
1456 0 : CALL timeset(routineN, handle)
1457 :
1458 0 : CALL get_qs_env(qs_env=qs_env, dft_control=dft_control, pw_env=pw_env)
1459 0 : CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool, pw_pools=pw_pools)
1460 0 : CALL auxbas_pw_pool%create_pw(wf_r)
1461 0 : CALL auxbas_pw_pool%create_pw(wf_g)
1462 :
1463 0 : CALL get_qs_env(qs_env, subsys=subsys)
1464 0 : CALL qs_subsys_get(subsys, particles=particles)
1465 :
1466 0 : my_pos_cube = "REWIND"
1467 0 : IF (append_cube) THEN
1468 0 : my_pos_cube = "APPEND"
1469 : END IF
1470 :
1471 : CALL get_qs_env(qs_env=qs_env, &
1472 : atomic_kind_set=atomic_kind_set, &
1473 : qs_kind_set=qs_kind_set, &
1474 : cell=cell, &
1475 0 : particle_set=particle_set)
1476 :
1477 0 : DO iset = 1, 2
1478 0 : CALL get_mo_set(mo_set=mos(iset), mo_coeff=mo_coeff, nmo=nmo)
1479 0 : DO i = 1, nmo
1480 : CALL calculate_wavefunction(mo_coeff, i, wf_r, wf_g, atomic_kind_set, qs_kind_set, &
1481 0 : cell, dft_control, particle_set, pw_env)
1482 0 : IF (iset == 1) THEN
1483 0 : WRITE (filename, '(A6,I3.3,A5,I2.2,a11)') "_NEXC_", istate, "_NTO_", i, "_Hole_State"
1484 0 : ELSEIF (iset == 2) THEN
1485 0 : WRITE (filename, '(A6,I3.3,A5,I2.2,a15)') "_NEXC_", istate, "_NTO_", i, "_Particle_State"
1486 : END IF
1487 0 : info_approx_trunc = TRIM(ADJUSTL(info_approximation))
1488 0 : info_approx_trunc = info_approx_trunc(2:LEN_TRIM(info_approx_trunc) - 1)
1489 0 : filename = TRIM(info_approx_trunc)//TRIM(filename)
1490 0 : mpi_io = .TRUE.
1491 : unit_nr_cube = cp_print_key_unit_nr(logger, print_section, '', extension=".cube", &
1492 : middle_name=TRIM(filename), file_position=my_pos_cube, &
1493 0 : log_filename=.FALSE., ignore_should_output=.TRUE., mpi_io=mpi_io)
1494 0 : IF (iset == 1) THEN
1495 0 : WRITE (title, *) "Natural Transition Orbital Hole State", i
1496 0 : ELSEIF (iset == 2) THEN
1497 0 : WRITE (title, *) "Natural Transition Orbital Particle State", i
1498 : END IF
1499 0 : CALL cp_pw_to_cube(wf_r, unit_nr_cube, title, particles=particles, stride=stride, mpi_io=mpi_io)
1500 : CALL cp_print_key_finished_output(unit_nr_cube, logger, print_section, '', &
1501 0 : ignore_should_output=.TRUE., mpi_io=mpi_io)
1502 : END DO
1503 : END DO
1504 :
1505 0 : CALL auxbas_pw_pool%give_back_pw(wf_g)
1506 0 : CALL auxbas_pw_pool%give_back_pw(wf_r)
1507 :
1508 0 : CALL timestop(handle)
1509 0 : END SUBROUTINE print_bse_nto_cubes
1510 :
1511 : ! **************************************************************************************************
1512 : !> \brief Checks BSE input section and adapts them if necessary
1513 : !> \param homo ...
1514 : !> \param virtual ...
1515 : !> \param unit_nr ...
1516 : !> \param mp2_env ...
1517 : !> \param qs_env ...
1518 : ! **************************************************************************************************
1519 42 : SUBROUTINE adapt_BSE_input_params(homo, virtual, unit_nr, mp2_env, qs_env)
1520 :
1521 : INTEGER, INTENT(IN) :: homo, virtual, unit_nr
1522 : TYPE(mp2_type) :: mp2_env
1523 : TYPE(qs_environment_type), POINTER :: qs_env
1524 :
1525 : CHARACTER(LEN=*), PARAMETER :: routineN = 'adapt_BSE_input_params'
1526 :
1527 : INTEGER :: handle, i, j, n, ndim_periodic_cell, &
1528 : ndim_periodic_poisson, &
1529 : num_state_list_exceptions
1530 : TYPE(cell_type), POINTER :: cell_ref
1531 : TYPE(pw_env_type), POINTER :: pw_env
1532 : TYPE(pw_poisson_type), POINTER :: poisson_env
1533 :
1534 42 : CALL timeset(routineN, handle)
1535 : ! Get environment infos for later usage
1536 42 : NULLIFY (pw_env, cell_ref, poisson_env)
1537 42 : CALL get_qs_env(qs_env, pw_env=pw_env, cell_ref=cell_ref)
1538 42 : CALL pw_env_get(pw_env, poisson_env=poisson_env)
1539 168 : ndim_periodic_poisson = COUNT(poisson_env%parameters%periodic == 1)
1540 168 : ndim_periodic_cell = SUM(cell_ref%perd(1:3)) ! Borrowed from cell_methods.F/write_cell_low
1541 :
1542 : ! Handle negative NUM_PRINT_EXC
1543 42 : IF (mp2_env%bse%num_print_exc < 0 .OR. &
1544 : mp2_env%bse%num_print_exc > homo*virtual) THEN
1545 0 : mp2_env%bse%num_print_exc = homo*virtual
1546 0 : IF (unit_nr > 0) THEN
1547 : CALL cp_hint(__LOCATION__, &
1548 : "Keyword NUM_PRINT_EXC is either negative or too large. "// &
1549 0 : "Printing all computed excitations.")
1550 : END IF
1551 : END IF
1552 :
1553 : ! Default to NUM_PRINT_EXC if too large or negative,
1554 : ! but only if NTOs are called - would be confusing for the user otherwise
1555 : ! Prepare and adapt user inputs for NTO analysis
1556 : ! Logic: Explicit state list overrides NUM_PRINT_EXC_NTOS
1557 : ! If only NUM_PRINT_EXC_NTOS is given, we write the array 1,...,NUM_PRINT_EXC_NTOS to
1558 : ! bse_nto_state_list
1559 42 : IF (mp2_env%bse%do_nto_analysis) THEN
1560 4 : IF (mp2_env%bse%explicit_nto_list) THEN
1561 0 : IF (mp2_env%bse%num_print_exc_ntos > 0) THEN
1562 0 : IF (unit_nr > 0) THEN
1563 : CALL cp_hint(__LOCATION__, &
1564 : "Keywords NUM_PRINT_EXC_NTOS and STATE_LIST are both given in input. "// &
1565 0 : "Overriding NUM_PRINT_EXC_NTOS.")
1566 : END IF
1567 : END IF
1568 : ! Check if all states are within the range
1569 : ! Count them and initialize new array afterwards
1570 0 : num_state_list_exceptions = 0
1571 0 : DO i = 1, SIZE(mp2_env%bse%bse_nto_state_list)
1572 0 : IF (mp2_env%bse%bse_nto_state_list(i) < 1 .OR. &
1573 0 : mp2_env%bse%bse_nto_state_list(i) > mp2_env%bse%num_print_exc) THEN
1574 0 : num_state_list_exceptions = num_state_list_exceptions + 1
1575 : END IF
1576 : END DO
1577 0 : IF (num_state_list_exceptions > 0) THEN
1578 0 : IF (unit_nr > 0) THEN
1579 : CALL cp_hint(__LOCATION__, &
1580 : "STATE_LIST contains indices outside the range of included excitation levels. "// &
1581 0 : "Ignoring these states.")
1582 : END IF
1583 : END IF
1584 0 : n = SIZE(mp2_env%bse%bse_nto_state_list) - num_state_list_exceptions
1585 0 : ALLOCATE (mp2_env%bse%bse_nto_state_list_final(n))
1586 0 : mp2_env%bse%bse_nto_state_list_final(:) = 0
1587 : i = 1
1588 0 : DO j = 1, SIZE(mp2_env%bse%bse_nto_state_list)
1589 0 : IF (mp2_env%bse%bse_nto_state_list(j) >= 1 .AND. &
1590 0 : mp2_env%bse%bse_nto_state_list(j) <= mp2_env%bse%num_print_exc) THEN
1591 0 : mp2_env%bse%bse_nto_state_list_final(i) = mp2_env%bse%bse_nto_state_list(j)
1592 0 : i = i + 1
1593 : END IF
1594 : END DO
1595 :
1596 0 : mp2_env%bse%num_print_exc_ntos = SIZE(mp2_env%bse%bse_nto_state_list_final)
1597 : ELSE
1598 4 : IF (mp2_env%bse%num_print_exc_ntos > mp2_env%bse%num_print_exc .OR. &
1599 : mp2_env%bse%num_print_exc_ntos < 0) THEN
1600 4 : mp2_env%bse%num_print_exc_ntos = mp2_env%bse%num_print_exc
1601 : END IF
1602 12 : ALLOCATE (mp2_env%bse%bse_nto_state_list_final(mp2_env%bse%num_print_exc_ntos))
1603 104 : DO i = 1, mp2_env%bse%num_print_exc_ntos
1604 104 : mp2_env%bse%bse_nto_state_list_final(i) = i
1605 : END DO
1606 : END IF
1607 : END IF
1608 :
1609 : ! Takes care of triplet states, when oscillator strengths are 0
1610 42 : IF (mp2_env%bse%bse_spin_config /= 0 .AND. &
1611 : mp2_env%bse%eps_nto_osc_str > 0) THEN
1612 0 : IF (unit_nr > 0) THEN
1613 : CALL cp_warn(__LOCATION__, &
1614 : "Cannot apply EPS_OSC_STR for Triplet excitations. "// &
1615 0 : "Resetting EPS_OSC_STR to default.")
1616 : END IF
1617 0 : mp2_env%bse%eps_nto_osc_str = -1.0_dp
1618 : END IF
1619 :
1620 : ! Take care of number for computed exciton descriptors
1621 42 : IF (mp2_env%bse%num_print_exc_descr < 0 .OR. &
1622 : mp2_env%bse%num_print_exc_descr > mp2_env%bse%num_print_exc) THEN
1623 4 : IF (unit_nr > 0) THEN
1624 : CALL cp_hint(__LOCATION__, &
1625 : "Keyword NUM_PRINT_EXC_DESCR is either negative or too large. "// &
1626 2 : "Printing exciton descriptors up to NUM_PRINT_EXC.")
1627 : END IF
1628 4 : mp2_env%bse%num_print_exc_descr = mp2_env%bse%num_print_exc
1629 : END IF
1630 :
1631 : ! Handle screening factor options
1632 42 : IF (mp2_env%BSE%screening_factor > 0.0_dp) THEN
1633 2 : IF (mp2_env%BSE%screening_method /= bse_screening_alpha) THEN
1634 0 : IF (unit_nr > 0) THEN
1635 : CALL cp_warn(__LOCATION__, &
1636 : "Screening factor is only supported for &SCREENING_IN_W ALPHA. "// &
1637 0 : "Resetting SCREENING_IN_W to ALPHA.")
1638 : END IF
1639 0 : mp2_env%BSE%screening_method = bse_screening_alpha
1640 : END IF
1641 2 : IF (mp2_env%BSE%screening_factor > 1.0_dp) THEN
1642 0 : IF (unit_nr > 0) THEN
1643 : CALL cp_warn(__LOCATION__, &
1644 0 : "Screening factor is larger than 1.0. ")
1645 : END IF
1646 : END IF
1647 : END IF
1648 :
1649 42 : IF (mp2_env%BSE%screening_factor < 0.0_dp .AND. &
1650 : mp2_env%BSE%screening_method == bse_screening_alpha) THEN
1651 0 : IF (unit_nr > 0) THEN
1652 : CALL cp_warn(__LOCATION__, &
1653 0 : "Screening factor is negative. Defaulting to 0.25")
1654 : END IF
1655 0 : mp2_env%BSE%screening_factor = 0.25_dp
1656 : END IF
1657 :
1658 42 : IF (mp2_env%BSE%screening_factor == 0.0_dp) THEN
1659 : ! Use RPA internally in this case
1660 0 : mp2_env%BSE%screening_method = bse_screening_rpa
1661 : END IF
1662 42 : IF (mp2_env%BSE%screening_factor == 1.0_dp) THEN
1663 : ! Use TDHF internally in this case
1664 0 : mp2_env%BSE%screening_method = bse_screening_tdhf
1665 : END IF
1666 :
1667 : ! Add warning for usage of KS energies
1668 42 : IF (mp2_env%bse%use_ks_energies) THEN
1669 0 : IF (unit_nr > 0) THEN
1670 : CALL cp_warn(__LOCATION__, &
1671 : "Using KS energies for BSE calculations. Therefore, no quantities "// &
1672 0 : "of the preceeding GW calculation enter the BSE.")
1673 : END IF
1674 : END IF
1675 :
1676 : ! Add warning if periodic calculation is invoked
1677 42 : IF (ndim_periodic_poisson /= 0) THEN
1678 0 : IF (unit_nr > 0) THEN
1679 : CALL cp_warn(__LOCATION__, &
1680 : "Poisson solver should be invoked by PERIODIC NONE. "// &
1681 : "The applied length gauge might give misleading results for "// &
1682 0 : "oscillator strengths.")
1683 : END IF
1684 : END IF
1685 42 : IF (ndim_periodic_cell /= 0) THEN
1686 0 : IF (unit_nr > 0) THEN
1687 : CALL cp_warn(__LOCATION__, &
1688 : "CELL in SUBSYS should be invoked with PERIODIC NONE. "// &
1689 : "The applied length gauge might give misleading results for "// &
1690 0 : "oscillator strengths.")
1691 : END IF
1692 : END IF
1693 :
1694 42 : CALL timestop(handle)
1695 42 : END SUBROUTINE adapt_BSE_input_params
1696 :
1697 : ! **************************************************************************************************
1698 :
1699 : ! **************************************************************************************************
1700 : !> \brief ...
1701 : !> \param fm_multipole_ai_trunc ...
1702 : !> \param fm_multipole_ij_trunc ...
1703 : !> \param fm_multipole_ab_trunc ...
1704 : !> \param qs_env ...
1705 : !> \param mo_coeff ...
1706 : !> \param rpoint ...
1707 : !> \param n_moments ...
1708 : !> \param homo_red ...
1709 : !> \param virtual_red ...
1710 : !> \param context_BSE ...
1711 : ! **************************************************************************************************
1712 48 : SUBROUTINE get_multipoles_mo(fm_multipole_ai_trunc, fm_multipole_ij_trunc, fm_multipole_ab_trunc, &
1713 48 : qs_env, mo_coeff, rpoint, n_moments, &
1714 : homo_red, virtual_red, context_BSE)
1715 :
1716 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
1717 : INTENT(INOUT) :: fm_multipole_ai_trunc, &
1718 : fm_multipole_ij_trunc, &
1719 : fm_multipole_ab_trunc
1720 : TYPE(qs_environment_type), POINTER :: qs_env
1721 : TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1722 : REAL(dp), ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: rpoint
1723 : INTEGER, INTENT(IN) :: n_moments, homo_red, virtual_red
1724 : TYPE(cp_blacs_env_type), POINTER :: context_BSE
1725 :
1726 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_multipoles_mo'
1727 :
1728 : INTEGER :: handle, idir, n_multipole, n_occ, &
1729 : n_virt, nao
1730 48 : REAL(KIND=dp), DIMENSION(:), POINTER :: ref_point
1731 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_mp_ab_trunc, fm_struct_mp_ai_trunc, &
1732 : fm_struct_mp_ij_trunc, fm_struct_multipoles_ao, fm_struct_nao_nao
1733 : TYPE(cp_fm_type) :: fm_work
1734 48 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_multipole_per_dir
1735 48 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_multipole, matrix_s
1736 48 : TYPE(mo_set_type), DIMENSION(:), POINTER :: mos
1737 : TYPE(mp_para_env_type), POINTER :: para_env_BSE
1738 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1739 48 : POINTER :: sab_orb
1740 :
1741 48 : CALL timeset(routineN, handle)
1742 :
1743 : !First, we calculate the AO dipoles
1744 48 : NULLIFY (sab_orb, matrix_s)
1745 : CALL get_qs_env(qs_env, &
1746 : mos=mos, &
1747 : matrix_s=matrix_s, &
1748 48 : sab_orb=sab_orb)
1749 :
1750 : ! Use the same blacs environment as for the MO coefficients to ensure correct multiplication dbcsr x fm later on
1751 48 : fm_struct_multipoles_ao => mos(1)%mo_coeff%matrix_struct
1752 : ! BSE has different contexts and blacsenvs
1753 48 : para_env_BSE => context_BSE%para_env
1754 : ! Get size of multipole tensor
1755 48 : n_multipole = (6 + 11*n_moments + 6*n_moments**2 + n_moments**3)/6 - 1
1756 48 : NULLIFY (matrix_multipole)
1757 48 : CALL dbcsr_allocate_matrix_set(matrix_multipole, n_multipole)
1758 324 : ALLOCATE (fm_multipole_per_dir(n_multipole))
1759 228 : DO idir = 1, n_multipole
1760 180 : CALL dbcsr_init_p(matrix_multipole(idir)%matrix)
1761 : CALL dbcsr_create(matrix_multipole(idir)%matrix, name="ao_multipole", &
1762 180 : template=matrix_s(1)%matrix, matrix_type=dbcsr_type_symmetric)
1763 180 : CALL cp_dbcsr_alloc_block_from_nbl(matrix_multipole(idir)%matrix, sab_orb)
1764 228 : CALL dbcsr_set(matrix_multipole(idir)%matrix, 0._dp)
1765 : END DO
1766 :
1767 48 : CALL get_reference_point(rpoint, qs_env=qs_env, reference=use_mom_ref_coac, ref_point=ref_point)
1768 :
1769 48 : CALL build_local_moment_matrix(qs_env, matrix_multipole, n_moments, ref_point=rpoint)
1770 :
1771 48 : NULLIFY (sab_orb)
1772 :
1773 : ! Now we transform them to MO
1774 : ! n_occ is the number of occupied MOs, nao the number of all AOs
1775 : ! Writing homo to n_occ instead if nmo,
1776 : ! takes care of ADDED_MOS, which would overwrite nmo, if invoked
1777 48 : CALL get_mo_set(mo_set=mos(1), homo=n_occ, nao=nao)
1778 48 : n_virt = nao - n_occ
1779 :
1780 : ! At the end, we need four different layouts of matrices in this multiplication, e.g. for a dipole:
1781 : ! D_pq = full multipole matrix for occupied and unoccupied
1782 : ! Final result:D_pq= C_{mu p} <\mu|\vec{r}|\nu> C_{\nu q} EQ.I
1783 : ! \_______/ \___________/ \______/
1784 : ! fm_coeff matrix_multipole fm_coeff
1785 : ! (EQ.Ia) (EQ.Ib) (EQ.Ia)
1786 : ! Intermediate work matrices:
1787 : ! fm_work = <\mu|\vec{r}|\nu> C_{\nu q} EQ.II
1788 :
1789 : ! Struct for the full multipole matrix
1790 : CALL cp_fm_struct_create(fm_struct_nao_nao, &
1791 : fm_struct_multipoles_ao%para_env, fm_struct_multipoles_ao%context, &
1792 48 : nao, nao)
1793 :
1794 : ! At the very end, we copy the multipoles corresponding to truncated BSE indices in i and a
1795 : CALL cp_fm_struct_create(fm_struct_mp_ai_trunc, para_env_BSE, &
1796 48 : context_BSE, virtual_red, homo_red)
1797 : CALL cp_fm_struct_create(fm_struct_mp_ij_trunc, para_env_BSE, &
1798 48 : context_BSE, homo_red, homo_red)
1799 : CALL cp_fm_struct_create(fm_struct_mp_ab_trunc, para_env_BSE, &
1800 48 : context_BSE, virtual_red, virtual_red)
1801 228 : DO idir = 1, n_multipole
1802 : CALL cp_fm_create(fm_multipole_ai_trunc(idir), matrix_struct=fm_struct_mp_ai_trunc, &
1803 180 : name="dipoles_mo_ai_trunc")
1804 180 : CALL cp_fm_set_all(fm_multipole_ai_trunc(idir), 0.0_dp)
1805 : CALL cp_fm_create(fm_multipole_ij_trunc(idir), matrix_struct=fm_struct_mp_ij_trunc, &
1806 180 : name="dipoles_mo_ij_trunc")
1807 180 : CALL cp_fm_set_all(fm_multipole_ij_trunc(idir), 0.0_dp)
1808 : CALL cp_fm_create(fm_multipole_ab_trunc(idir), matrix_struct=fm_struct_mp_ab_trunc, &
1809 180 : name="dipoles_mo_ab_trunc")
1810 228 : CALL cp_fm_set_all(fm_multipole_ab_trunc(idir), 0.0_dp)
1811 : END DO
1812 :
1813 : ! Need another temporary matrix to store intermediate result from right multiplication
1814 : ! D = C_{mu a} <\mu|\vec{r}|\nu> C_{\nu i}
1815 48 : CALL cp_fm_create(fm_work, matrix_struct=fm_struct_nao_nao, name="multipole_work")
1816 48 : CALL cp_fm_set_all(fm_work, 0.0_dp)
1817 :
1818 228 : DO idir = 1, n_multipole
1819 : ! Create the full multipole matrix per direction
1820 180 : CALL cp_fm_create(fm_multipole_per_dir(idir), matrix_struct=fm_struct_nao_nao, name="multipoles_mo")
1821 180 : CALL cp_fm_set_all(fm_multipole_per_dir(idir), 0.0_dp)
1822 : ! Fill final (MO) multipole matrix
1823 : CALL cp_dbcsr_sm_fm_multiply(matrix_multipole(idir)%matrix, mo_coeff(1), &
1824 180 : fm_work, ncol=nao)
1825 : ! Now obtain the multipoles by the final multiplication;
1826 : ! We do that inside the loop to obtain multipoles per axis for print
1827 180 : CALL parallel_gemm('T', 'N', nao, nao, nao, 1.0_dp, mo_coeff(1), fm_work, 0.0_dp, fm_multipole_per_dir(idir))
1828 : ! Truncate full matrix to the BSE indices
1829 : ! D_ai
1830 : CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
1831 : fm_multipole_ai_trunc(idir), &
1832 : virtual_red, &
1833 : homo_red, &
1834 : n_occ + 1, &
1835 : n_occ - homo_red + 1, &
1836 : 1, &
1837 : 1, &
1838 180 : fm_multipole_per_dir(idir)%matrix_struct%context)
1839 : ! D_ij
1840 : CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
1841 : fm_multipole_ij_trunc(idir), &
1842 : homo_red, &
1843 : homo_red, &
1844 : n_occ - homo_red + 1, &
1845 : n_occ - homo_red + 1, &
1846 : 1, &
1847 : 1, &
1848 180 : fm_multipole_per_dir(idir)%matrix_struct%context)
1849 : ! D_ab
1850 : CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
1851 : fm_multipole_ab_trunc(idir), &
1852 : virtual_red, &
1853 : virtual_red, &
1854 : n_occ + 1, &
1855 : n_occ + 1, &
1856 : 1, &
1857 : 1, &
1858 228 : fm_multipole_per_dir(idir)%matrix_struct%context)
1859 : END DO
1860 :
1861 : !Release matrices and structs
1862 48 : NULLIFY (fm_struct_multipoles_ao)
1863 48 : CALL cp_fm_struct_release(fm_struct_mp_ai_trunc)
1864 48 : CALL cp_fm_struct_release(fm_struct_mp_ij_trunc)
1865 48 : CALL cp_fm_struct_release(fm_struct_mp_ab_trunc)
1866 48 : CALL cp_fm_struct_release(fm_struct_nao_nao)
1867 228 : DO idir = 1, n_multipole
1868 228 : CALL cp_fm_release(fm_multipole_per_dir(idir))
1869 : END DO
1870 48 : DEALLOCATE (fm_multipole_per_dir)
1871 48 : CALL cp_fm_release(fm_work)
1872 48 : CALL dbcsr_deallocate_matrix_set(matrix_multipole)
1873 :
1874 48 : CALL timestop(handle)
1875 :
1876 144 : END SUBROUTINE get_multipoles_mo
1877 :
1878 : ! **************************************************************************************************
1879 : !> \brief Computes trace of form Tr{A^T B C} for exciton descriptors
1880 : !> \param fm_A Full Matrix, typically X or Y, in format homo x virtual
1881 : !> \param fm_B ...
1882 : !> \param fm_C ...
1883 : !> \param alpha ...
1884 : ! **************************************************************************************************
1885 11520 : SUBROUTINE trace_exciton_descr(fm_A, fm_B, fm_C, alpha)
1886 :
1887 : TYPE(cp_fm_type), INTENT(IN) :: fm_A, fm_B, fm_C
1888 : REAL(KIND=dp), INTENT(OUT) :: alpha
1889 :
1890 : CHARACTER(LEN=*), PARAMETER :: routineN = 'trace_exciton_descr'
1891 :
1892 : INTEGER :: handle, ncol_A, ncol_B, ncol_C, nrow_A, &
1893 : nrow_B, nrow_C
1894 : TYPE(cp_fm_type) :: fm_work_ia
1895 :
1896 1920 : CALL timeset(routineN, handle)
1897 :
1898 1920 : CALL cp_fm_create(fm_work_ia, fm_A%matrix_struct)
1899 1920 : CALL cp_fm_get_info(fm_A, nrow_global=nrow_A, ncol_global=ncol_A)
1900 1920 : CALL cp_fm_get_info(fm_B, nrow_global=nrow_B, ncol_global=ncol_B)
1901 1920 : CALL cp_fm_get_info(fm_C, nrow_global=nrow_C, ncol_global=ncol_C)
1902 :
1903 : ! Check matrix sizes
1904 1920 : CPASSERT(nrow_A == nrow_B .AND. ncol_A == ncol_C .AND. ncol_B == nrow_C)
1905 :
1906 1920 : CALL cp_fm_set_all(fm_work_ia, 0.0_dp)
1907 :
1908 : CALL parallel_gemm("N", "N", nrow_A, ncol_A, nrow_C, 1.0_dp, &
1909 1920 : fm_B, fm_C, 0.0_dp, fm_work_ia)
1910 :
1911 1920 : CALL cp_fm_trace(fm_A, fm_work_ia, alpha)
1912 :
1913 1920 : CALL cp_fm_release(fm_work_ia)
1914 :
1915 1920 : CALL timestop(handle)
1916 :
1917 1920 : END SUBROUTINE trace_exciton_descr
1918 :
1919 : END MODULE
|