Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
10 : !> \par History
11 : !> 11.2023 created [Maximilian Graml]
12 : ! **************************************************************************************************
13 : MODULE bse_util
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE cell_types, ONLY: cell_type
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_control_types, ONLY: dft_control_type
18 : USE cp_dbcsr_api, ONLY: dbcsr_create,&
19 : dbcsr_init_p,&
20 : dbcsr_p_type,&
21 : dbcsr_set,&
22 : dbcsr_type_symmetric
23 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_alloc_block_from_nbl
24 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply,&
25 : dbcsr_allocate_matrix_set,&
26 : dbcsr_deallocate_matrix_set
27 : USE cp_fm_basic_linalg, ONLY: cp_fm_trace,&
28 : cp_fm_upper_to_full
29 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose,&
30 : cp_fm_cholesky_invert
31 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
32 : cp_fm_struct_release,&
33 : cp_fm_struct_type
34 : USE cp_fm_types, ONLY: cp_fm_create,&
35 : cp_fm_get_info,&
36 : cp_fm_release,&
37 : cp_fm_set_all,&
38 : cp_fm_to_fm_submat,&
39 : cp_fm_to_fm_submat_general,&
40 : cp_fm_type
41 : USE cp_log_handling, ONLY: cp_get_default_logger,&
42 : cp_logger_type
43 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
44 : cp_print_key_unit_nr
45 : USE cp_realspace_grid_cube, ONLY: cp_pw_to_cube
46 : USE input_constants, ONLY: use_mom_ref_coac
47 : USE input_section_types, ONLY: section_vals_type
48 : USE kinds, ONLY: default_path_length,&
49 : dp,&
50 : int_8
51 : USE message_passing, ONLY: mp_para_env_type,&
52 : mp_request_type
53 : USE moments_utils, ONLY: get_reference_point
54 : USE mp2_types, ONLY: integ_mat_buffer_type,&
55 : mp2_type
56 : USE parallel_gemm_api, ONLY: parallel_gemm
57 : USE particle_list_types, ONLY: particle_list_type
58 : USE particle_types, ONLY: particle_type
59 : USE physcon, ONLY: evolt
60 : USE pw_env_types, ONLY: pw_env_get,&
61 : pw_env_type
62 : USE pw_pool_types, ONLY: pw_pool_p_type,&
63 : pw_pool_type
64 : USE pw_types, ONLY: pw_c1d_gs_type,&
65 : pw_r3d_rs_type
66 : USE qs_collocate_density, ONLY: calculate_wavefunction
67 : USE qs_environment_types, ONLY: get_qs_env,&
68 : qs_environment_type
69 : USE qs_kind_types, ONLY: qs_kind_type
70 : USE qs_mo_types, ONLY: get_mo_set,&
71 : mo_set_type
72 : USE qs_moments, ONLY: build_local_moment_matrix
73 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
74 : USE qs_subsys_types, ONLY: qs_subsys_get,&
75 : qs_subsys_type
76 : USE rpa_communication, ONLY: communicate_buffer
77 : USE util, ONLY: sort,&
78 : sort_unique
79 : #include "./base/base_uses.f90"
80 :
81 : IMPLICIT NONE
82 :
83 : PRIVATE
84 :
85 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
86 :
87 : PUBLIC :: mult_B_with_W, fm_general_add_bse, truncate_fm, &
88 : deallocate_matrices_bse, comp_eigvec_coeff_BSE, sort_excitations, &
89 : estimate_BSE_resources, filter_eigvec_contrib, truncate_BSE_matrices, &
90 : determine_cutoff_indices, adapt_BSE_input_params, get_multipoles_mo, &
91 : reshuffle_eigvec, print_bse_nto_cubes, trace_exciton_descr
92 :
93 : CONTAINS
94 :
95 : ! **************************************************************************************************
96 : !> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
97 : !> \param fm_mat_S_ij_bse ...
98 : !> \param fm_mat_S_ia_bse ...
99 : !> \param fm_mat_S_bar_ia_bse ...
100 : !> \param fm_mat_S_bar_ij_bse ...
101 : !> \param fm_mat_Q_static_bse_gemm ...
102 : !> \param dimen_RI ...
103 : !> \param homo ...
104 : !> \param virtual ...
105 : ! **************************************************************************************************
106 192 : SUBROUTINE mult_B_with_W(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
107 : fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
108 : dimen_RI, homo, virtual)
109 :
110 : TYPE(cp_fm_type), INTENT(IN) :: fm_mat_S_ij_bse, fm_mat_S_ia_bse
111 : TYPE(cp_fm_type), INTENT(OUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse
112 : TYPE(cp_fm_type), INTENT(IN) :: fm_mat_Q_static_bse_gemm
113 : INTEGER, INTENT(IN) :: dimen_RI, homo, virtual
114 :
115 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mult_B_with_W'
116 :
117 : INTEGER :: handle, i_global, iiB, info_chol, &
118 : j_global, jjB, ncol_local, nrow_local
119 32 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
120 : TYPE(cp_fm_type) :: fm_work
121 :
122 32 : CALL timeset(routineN, handle)
123 :
124 32 : CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S_ia_bse%matrix_struct)
125 32 : CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
126 :
127 32 : CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
128 32 : CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
129 :
130 32 : CALL cp_fm_create(fm_work, fm_mat_Q_static_bse_gemm%matrix_struct)
131 32 : CALL cp_fm_set_all(fm_work, 0.0_dp)
132 :
133 : ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
134 : CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
135 : nrow_local=nrow_local, &
136 : ncol_local=ncol_local, &
137 : row_indices=row_indices, &
138 32 : col_indices=col_indices)
139 :
140 2688 : DO jjB = 1, ncol_local
141 2656 : j_global = col_indices(jjB)
142 112912 : DO iiB = 1, nrow_local
143 110224 : i_global = row_indices(iiB)
144 112880 : IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
145 1328 : fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
146 : END IF
147 : END DO
148 : END DO
149 :
150 : ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
151 32 : CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
152 :
153 32 : CPASSERT(info_chol == 0)
154 :
155 : ! calculate [1+Q(i0)]^-1
156 32 : CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
157 :
158 : ! symmetrize the result
159 32 : CALL cp_fm_upper_to_full(fm_mat_Q_static_bse_gemm, fm_work)
160 :
161 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo**2, k=dimen_RI, alpha=1.0_dp, &
162 : matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ij_bse, beta=0.0_dp, &
163 32 : matrix_c=fm_mat_S_bar_ij_bse)
164 :
165 : ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
166 : ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
167 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo*virtual, k=dimen_RI, alpha=1.0_dp, &
168 : matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ia_bse, beta=0.0_dp, &
169 32 : matrix_c=fm_mat_S_bar_ia_bse)
170 :
171 32 : CALL cp_fm_release(fm_work)
172 :
173 32 : CALL timestop(handle)
174 :
175 32 : END SUBROUTINE
176 :
177 : ! **************************************************************************************************
178 : !> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
179 : !> to A_ia, jb which needs MPI communication.
180 : !> \param fm_out ...
181 : !> \param fm_in ...
182 : !> \param beta ...
183 : !> \param nrow_secidx_in ...
184 : !> \param ncol_secidx_in ...
185 : !> \param nrow_secidx_out ...
186 : !> \param ncol_secidx_out ...
187 : !> \param unit_nr ...
188 : !> \param reordering ...
189 : !> \param mp2_env ...
190 : ! **************************************************************************************************
191 444 : SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
192 : nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
193 :
194 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_out
195 : TYPE(cp_fm_type), INTENT(IN) :: fm_in
196 : REAL(kind=dp) :: beta
197 : INTEGER, INTENT(IN) :: nrow_secidx_in, ncol_secidx_in, &
198 : nrow_secidx_out, ncol_secidx_out
199 : INTEGER :: unit_nr
200 : INTEGER, DIMENSION(4) :: reordering
201 : TYPE(mp2_type), INTENT(IN) :: mp2_env
202 :
203 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse'
204 :
205 : INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
206 : iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, nprocs, &
207 : nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, row_idx_loc, &
208 : send_pcol, send_prow
209 444 : INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, &
210 : num_entries_send
211 : INTEGER, DIMENSION(4) :: indices_in
212 444 : INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, &
213 444 : row_indices_in, row_indices_out
214 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
215 444 : DIMENSION(:) :: buffer_rec, buffer_send
216 : TYPE(mp_para_env_type), POINTER :: para_env_out
217 444 : TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_array
218 :
219 444 : CALL timeset(routineN, handle)
220 444 : CALL timeset(routineN//"_1_setup", handle2)
221 :
222 444 : para_env_out => fm_out%matrix_struct%para_env
223 : ! A_iajb
224 : ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
225 : CALL cp_fm_get_info(matrix=fm_out, &
226 : nrow_local=nrow_local_out, &
227 : ncol_local=ncol_local_out, &
228 : row_indices=row_indices_out, &
229 : col_indices=col_indices_out, &
230 : nrow_block=nrow_block_out, &
231 444 : ncol_block=ncol_block_out)
232 :
233 1332 : ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
234 1332 : ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
235 :
236 1332 : num_entries_rec(:) = 0
237 1332 : num_entries_send(:) = 0
238 :
239 444 : dummy = 0
240 :
241 : CALL cp_fm_get_info(matrix=fm_in, &
242 : nrow_local=nrow_local_in, &
243 : ncol_local=ncol_local_in, &
244 : row_indices=row_indices_in, &
245 : col_indices=col_indices_in, &
246 : nrow_block=nrow_block_in, &
247 444 : ncol_block=ncol_block_in)
248 :
249 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
250 84 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
251 168 : fm_out%matrix_struct%nrow_global
252 84 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
253 168 : fm_out%matrix_struct%ncol_global
254 :
255 84 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
256 84 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
257 :
258 84 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
259 168 : fm_in%matrix_struct%nrow_global
260 84 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
261 168 : fm_in%matrix_struct%ncol_global
262 :
263 84 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
264 84 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
265 : END IF
266 :
267 : ! Use scalapack wrapper to find process index in fm_out
268 : ! To that end, we obtain the global index in fm_out from the level indices
269 444 : indices_in(:) = 0
270 8860 : DO row_idx_loc = 1, nrow_local_in
271 8416 : indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
272 8416 : indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
273 73660 : DO col_idx_loc = 1, ncol_local_in
274 64800 : indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
275 64800 : indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
276 :
277 64800 : idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
278 64800 : idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
279 :
280 64800 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
281 64800 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
282 :
283 64800 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
284 :
285 73216 : num_entries_send(proc_send) = num_entries_send(proc_send) + 1
286 :
287 : END DO
288 : END DO
289 :
290 444 : CALL timestop(handle2)
291 :
292 444 : CALL timeset(routineN//"_2_comm_entry_nums", handle2)
293 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
294 84 : WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
295 : END IF
296 :
297 444 : CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
298 :
299 444 : CALL timestop(handle2)
300 :
301 444 : CALL timeset(routineN//"_3_alloc_buffer", handle2)
302 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
303 84 : WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
304 : END IF
305 :
306 : ! Buffers for entries and their indices
307 2220 : ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
308 2220 : ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
309 :
310 : ! allocate data message and corresponding indices
311 1332 : DO iproc = 0, para_env_out%num_pe - 1
312 :
313 2270 : ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
314 66132 : buffer_rec(iproc)%msg = 0.0_dp
315 :
316 : END DO
317 :
318 1332 : DO iproc = 0, para_env_out%num_pe - 1
319 :
320 2270 : ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
321 66132 : buffer_send(iproc)%msg = 0.0_dp
322 :
323 : END DO
324 :
325 1332 : DO iproc = 0, para_env_out%num_pe - 1
326 :
327 2270 : ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
328 132708 : buffer_rec(iproc)%indx = 0
329 :
330 : END DO
331 :
332 1332 : DO iproc = 0, para_env_out%num_pe - 1
333 :
334 2270 : ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
335 132708 : buffer_send(iproc)%indx = 0
336 :
337 : END DO
338 :
339 444 : CALL timestop(handle2)
340 :
341 444 : CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
342 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
343 84 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
344 : END IF
345 :
346 1332 : ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
347 1332 : entry_counter(:) = 0
348 :
349 : ! Now we can write the actual data and indices to the send-buffer
350 8860 : DO row_idx_loc = 1, nrow_local_in
351 8416 : indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
352 8416 : indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
353 73660 : DO col_idx_loc = 1, ncol_local_in
354 64800 : indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
355 64800 : indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
356 :
357 64800 : idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
358 64800 : idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
359 :
360 64800 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
361 64800 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
362 :
363 64800 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
364 64800 : entry_counter(proc_send) = entry_counter(proc_send) + 1
365 :
366 : buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
367 64800 : fm_in%local_data(row_idx_loc, col_idx_loc)
368 :
369 64800 : buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
370 73216 : buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
371 :
372 : END DO
373 : END DO
374 :
375 6660 : ALLOCATE (req_array(1:para_env_out%num_pe, 4))
376 :
377 444 : CALL timestop(handle2)
378 :
379 444 : CALL timeset(routineN//"_5_comm_buffer", handle2)
380 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
381 84 : WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
382 : END IF
383 :
384 : ! communicate the buffer
385 : CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
386 444 : buffer_send, req_array)
387 :
388 444 : CALL timestop(handle2)
389 :
390 444 : CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
391 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
392 84 : WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
393 : END IF
394 :
395 : ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
396 444 : nprocs = para_env_out%num_pe
397 :
398 : !$OMP PARALLEL DO DEFAULT(NONE) &
399 : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec, beta) &
400 444 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
401 : DO iproc = 0, nprocs - 1
402 : DO i_entry_rec = 1, num_entries_rec(iproc)
403 : ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
404 : jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
405 :
406 : fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
407 : END DO
408 : END DO
409 : !$OMP END PARALLEL DO
410 :
411 444 : CALL timestop(handle2)
412 :
413 444 : CALL timeset(routineN//"_7_cleanup", handle2)
414 444 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
415 84 : WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
416 : END IF
417 :
418 : !Clean up all the arrays from the communication process
419 1332 : DO iproc = 0, para_env_out%num_pe - 1
420 888 : DEALLOCATE (buffer_rec(iproc)%msg)
421 888 : DEALLOCATE (buffer_rec(iproc)%indx)
422 888 : DEALLOCATE (buffer_send(iproc)%msg)
423 1332 : DEALLOCATE (buffer_send(iproc)%indx)
424 : END DO
425 2220 : DEALLOCATE (buffer_rec, buffer_send)
426 444 : DEALLOCATE (req_array)
427 444 : DEALLOCATE (entry_counter)
428 444 : DEALLOCATE (num_entries_rec, num_entries_send)
429 :
430 444 : CALL timestop(handle2)
431 444 : CALL timestop(handle)
432 :
433 3108 : END SUBROUTINE fm_general_add_bse
434 :
435 : ! **************************************************************************************************
436 : !> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
437 : !> Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in for the incoming (untruncated) matrix
438 : !> and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
439 : !> via parallel communication.
440 : !> \param fm_out ...
441 : !> \param fm_in ...
442 : !> \param ncol_in ...
443 : !> \param nrow_out ...
444 : !> \param ncol_out ...
445 : !> \param unit_nr ...
446 : !> \param mp2_env ...
447 : !> \param nrow_offset ...
448 : !> \param ncol_offset ...
449 : ! **************************************************************************************************
450 96 : SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
451 : nrow_out, ncol_out, unit_nr, mp2_env, &
452 : nrow_offset, ncol_offset)
453 :
454 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_out
455 : TYPE(cp_fm_type), INTENT(IN) :: fm_in
456 : INTEGER :: ncol_in, nrow_out, ncol_out, unit_nr
457 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
458 : INTEGER, INTENT(IN), OPTIONAL :: nrow_offset, ncol_offset
459 :
460 : CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_fm'
461 :
462 : INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
463 : idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
464 : ncol_local_in, ncol_local_out, nprocs, nrow_block_in, nrow_block_out, nrow_local_in, &
465 : nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
466 96 : INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, &
467 : num_entries_send
468 96 : INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, &
469 96 : row_indices_in, row_indices_out
470 : LOGICAL :: correct_ncol, correct_nrow
471 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
472 96 : DIMENSION(:) :: buffer_rec, buffer_send
473 : TYPE(mp_para_env_type), POINTER :: para_env_out
474 96 : TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_array
475 :
476 96 : CALL timeset(routineN, handle)
477 96 : CALL timeset(routineN//"_1_setup", handle2)
478 :
479 96 : correct_nrow = .FALSE.
480 96 : correct_ncol = .FALSE.
481 : !In case of truncation in the occupied space, we need to correct the interval of indices
482 96 : IF (PRESENT(nrow_offset)) THEN
483 64 : correct_nrow = .TRUE.
484 : END IF
485 96 : IF (PRESENT(ncol_offset)) THEN
486 32 : correct_ncol = .TRUE.
487 : END IF
488 :
489 96 : para_env_out => fm_out%matrix_struct%para_env
490 :
491 : CALL cp_fm_get_info(matrix=fm_out, &
492 : nrow_local=nrow_local_out, &
493 : ncol_local=ncol_local_out, &
494 : row_indices=row_indices_out, &
495 : col_indices=col_indices_out, &
496 : nrow_block=nrow_block_out, &
497 96 : ncol_block=ncol_block_out)
498 :
499 288 : ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
500 288 : ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
501 :
502 288 : num_entries_rec(:) = 0
503 288 : num_entries_send(:) = 0
504 :
505 96 : dummy = 0
506 :
507 : CALL cp_fm_get_info(matrix=fm_in, &
508 : nrow_local=nrow_local_in, &
509 : ncol_local=ncol_local_in, &
510 : row_indices=row_indices_in, &
511 : col_indices=col_indices_in, &
512 : nrow_block=nrow_block_in, &
513 96 : ncol_block=ncol_block_in)
514 :
515 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
516 6 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
517 12 : fm_out%matrix_struct%nrow_global
518 6 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
519 12 : fm_out%matrix_struct%ncol_global
520 :
521 6 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
522 6 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
523 :
524 6 : WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
525 12 : fm_in%matrix_struct%nrow_global
526 6 : WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
527 12 : fm_in%matrix_struct%ncol_global
528 :
529 6 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
530 6 : WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
531 : END IF
532 :
533 : ! We find global indices in S with nrow_in and ncol_in for truncation
534 7648 : DO col_idx_loc = 1, ncol_local_in
535 7552 : idx_col_in = col_indices_in(col_idx_loc)
536 :
537 7552 : idx_col_first = (idx_col_in - 1)/ncol_in + 1
538 7552 : idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
539 :
540 : ! If occupied orbitals are included, these have to be handled differently
541 : ! due to their reversed indexing
542 7552 : IF (correct_nrow) THEN
543 2944 : idx_col_first = idx_col_first - nrow_offset + 1
544 2944 : IF (idx_col_first .LE. 0) CYCLE
545 : ELSE
546 4608 : IF (idx_col_first > nrow_out) EXIT
547 : END IF
548 7552 : IF (correct_ncol) THEN
549 512 : idx_col_sec = idx_col_sec - ncol_offset + 1
550 512 : IF (idx_col_sec .LE. 0) CYCLE
551 : ELSE
552 7040 : IF (idx_col_sec > ncol_out) CYCLE
553 : END IF
554 :
555 6656 : idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
556 :
557 282976 : DO row_idx_loc = 1, nrow_local_in
558 276224 : idx_row_in = row_indices_in(row_idx_loc)
559 :
560 276224 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
561 276224 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
562 :
563 276224 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
564 :
565 283776 : num_entries_send(proc_send) = num_entries_send(proc_send) + 1
566 :
567 : END DO
568 : END DO
569 :
570 96 : CALL timestop(handle2)
571 :
572 96 : CALL timeset(routineN//"_2_comm_entry_nums", handle2)
573 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
574 6 : WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
575 : END IF
576 :
577 96 : CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
578 :
579 96 : CALL timestop(handle2)
580 :
581 96 : CALL timeset(routineN//"_3_alloc_buffer", handle2)
582 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
583 6 : WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
584 : END IF
585 :
586 : ! Buffers for entries and their indices
587 480 : ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
588 480 : ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
589 :
590 : ! allocate data message and corresponding indices
591 288 : DO iproc = 0, para_env_out%num_pe - 1
592 :
593 512 : ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
594 276512 : buffer_rec(iproc)%msg = 0.0_dp
595 :
596 : END DO
597 :
598 288 : DO iproc = 0, para_env_out%num_pe - 1
599 :
600 512 : ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
601 276512 : buffer_send(iproc)%msg = 0.0_dp
602 :
603 : END DO
604 :
605 288 : DO iproc = 0, para_env_out%num_pe - 1
606 :
607 512 : ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
608 553120 : buffer_rec(iproc)%indx = 0
609 :
610 : END DO
611 :
612 288 : DO iproc = 0, para_env_out%num_pe - 1
613 :
614 512 : ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
615 553120 : buffer_send(iproc)%indx = 0
616 :
617 : END DO
618 :
619 96 : CALL timestop(handle2)
620 :
621 96 : CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
622 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
623 6 : WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
624 : END IF
625 :
626 288 : ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
627 288 : entry_counter(:) = 0
628 :
629 : ! Now we can write the actual data and indices to the send-buffer
630 7648 : DO col_idx_loc = 1, ncol_local_in
631 7552 : idx_col_in = col_indices_in(col_idx_loc)
632 :
633 7552 : idx_col_first = (idx_col_in - 1)/ncol_in + 1
634 7552 : idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
635 :
636 : ! If occupied orbitals are included, these have to be handled differently
637 : ! due to their reversed indexing
638 7552 : IF (correct_nrow) THEN
639 2944 : idx_col_first = idx_col_first - nrow_offset + 1
640 2944 : IF (idx_col_first .LE. 0) CYCLE
641 : ELSE
642 4608 : IF (idx_col_first > nrow_out) EXIT
643 : END IF
644 7552 : IF (correct_ncol) THEN
645 512 : idx_col_sec = idx_col_sec - ncol_offset + 1
646 512 : IF (idx_col_sec .LE. 0) CYCLE
647 : ELSE
648 7040 : IF (idx_col_sec > ncol_out) CYCLE
649 : END IF
650 :
651 6656 : idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
652 :
653 282976 : DO row_idx_loc = 1, nrow_local_in
654 276224 : idx_row_in = row_indices_in(row_idx_loc)
655 :
656 276224 : send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
657 :
658 276224 : send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
659 :
660 276224 : proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
661 276224 : entry_counter(proc_send) = entry_counter(proc_send) + 1
662 :
663 : buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
664 276224 : fm_in%local_data(row_idx_loc, col_idx_loc)
665 : !No need to create row_out, since it is identical to incoming
666 : !We dont change the RI index for any fm_mat_XX_BSE
667 276224 : buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
668 283776 : buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
669 :
670 : END DO
671 : END DO
672 :
673 1440 : ALLOCATE (req_array(1:para_env_out%num_pe, 4))
674 :
675 96 : CALL timestop(handle2)
676 :
677 96 : CALL timeset(routineN//"_5_comm_buffer", handle2)
678 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
679 6 : WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
680 : END IF
681 :
682 : ! communicate the buffer
683 : CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
684 96 : buffer_send, req_array)
685 :
686 96 : CALL timestop(handle2)
687 :
688 96 : CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
689 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
690 6 : WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
691 : END IF
692 :
693 : ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
694 96 : nprocs = para_env_out%num_pe
695 :
696 : !$OMP PARALLEL DO DEFAULT(NONE) &
697 : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec) &
698 96 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
699 : DO iproc = 0, nprocs - 1
700 : DO i_entry_rec = 1, num_entries_rec(iproc)
701 : ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
702 : jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
703 :
704 : fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
705 : END DO
706 : END DO
707 : !$OMP END PARALLEL DO
708 :
709 96 : CALL timestop(handle2)
710 :
711 96 : CALL timeset(routineN//"_7_cleanup", handle2)
712 96 : IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
713 6 : WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
714 : END IF
715 :
716 : !Clean up all the arrays from the communication process
717 288 : DO iproc = 0, para_env_out%num_pe - 1
718 192 : DEALLOCATE (buffer_rec(iproc)%msg)
719 192 : DEALLOCATE (buffer_rec(iproc)%indx)
720 192 : DEALLOCATE (buffer_send(iproc)%msg)
721 288 : DEALLOCATE (buffer_send(iproc)%indx)
722 : END DO
723 480 : DEALLOCATE (buffer_rec, buffer_send)
724 96 : DEALLOCATE (req_array)
725 96 : DEALLOCATE (entry_counter)
726 96 : DEALLOCATE (num_entries_rec, num_entries_send)
727 :
728 96 : CALL timestop(handle2)
729 96 : CALL timestop(handle)
730 :
731 768 : END SUBROUTINE truncate_fm
732 :
733 : ! **************************************************************************************************
734 : !> \brief ...
735 : !> \param fm_mat_S_bar_ia_bse ...
736 : !> \param fm_mat_S_bar_ij_bse ...
737 : !> \param fm_mat_S_trunc ...
738 : !> \param fm_mat_S_ij_trunc ...
739 : !> \param fm_mat_S_ab_trunc ...
740 : !> \param fm_mat_Q_static_bse_gemm ...
741 : !> \param mp2_env ...
742 : ! **************************************************************************************************
743 32 : SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
744 : fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
745 : fm_mat_Q_static_bse_gemm, mp2_env)
746 :
747 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, fm_mat_S_trunc, &
748 : fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, fm_mat_Q_static_bse_gemm
749 : TYPE(mp2_type) :: mp2_env
750 :
751 : CHARACTER(LEN=*), PARAMETER :: routineN = 'deallocate_matrices_bse'
752 :
753 : INTEGER :: handle
754 :
755 32 : CALL timeset(routineN, handle)
756 :
757 32 : CALL cp_fm_release(fm_mat_S_bar_ia_bse)
758 32 : CALL cp_fm_release(fm_mat_S_bar_ij_bse)
759 32 : CALL cp_fm_release(fm_mat_S_trunc)
760 32 : CALL cp_fm_release(fm_mat_S_ij_trunc)
761 32 : CALL cp_fm_release(fm_mat_S_ab_trunc)
762 32 : CALL cp_fm_release(fm_mat_Q_static_bse_gemm)
763 32 : IF (mp2_env%bse%do_nto_analysis) THEN
764 4 : DEALLOCATE (mp2_env%bse%bse_nto_state_list_final)
765 : END IF
766 :
767 32 : CALL timestop(handle)
768 :
769 32 : END SUBROUTINE deallocate_matrices_bse
770 :
771 : ! **************************************************************************************************
772 : !> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
773 : !> multiplication with the eigenvalues
774 : !> \param fm_work ...
775 : !> \param eig_vals ...
776 : !> \param beta ...
777 : !> \param gamma ...
778 : !> \param do_transpose ...
779 : ! **************************************************************************************************
780 64 : SUBROUTINE comp_eigvec_coeff_BSE(fm_work, eig_vals, beta, gamma, do_transpose)
781 :
782 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_work
783 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
784 : INTENT(IN) :: eig_vals
785 : REAL(KIND=dp), INTENT(IN) :: beta
786 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: gamma
787 : LOGICAL, INTENT(IN), OPTIONAL :: do_transpose
788 :
789 : CHARACTER(LEN=*), PARAMETER :: routineN = 'comp_eigvec_coeff_BSE'
790 :
791 : INTEGER :: handle, i_row_global, ii, j_col_global, &
792 : jj, ncol_local, nrow_local
793 32 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
794 : LOGICAL :: my_do_transpose
795 : REAL(KIND=dp) :: coeff, my_gamma
796 :
797 32 : CALL timeset(routineN, handle)
798 :
799 32 : IF (PRESENT(gamma)) THEN
800 32 : my_gamma = gamma
801 : ELSE
802 : my_gamma = 2.0_dp
803 : END IF
804 :
805 32 : IF (PRESENT(do_transpose)) THEN
806 32 : my_do_transpose = do_transpose
807 : ELSE
808 : my_do_transpose = .FALSE.
809 : END IF
810 :
811 : CALL cp_fm_get_info(matrix=fm_work, &
812 : nrow_local=nrow_local, &
813 : ncol_local=ncol_local, &
814 : row_indices=row_indices, &
815 32 : col_indices=col_indices)
816 :
817 32 : IF (my_do_transpose) THEN
818 1568 : DO jj = 1, ncol_local
819 1536 : j_col_global = col_indices(jj)
820 38432 : DO ii = 1, nrow_local
821 36864 : coeff = (eig_vals(j_col_global)**beta)/my_gamma
822 38400 : fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
823 : END DO
824 : END DO
825 : ELSE
826 0 : DO jj = 1, ncol_local
827 0 : DO ii = 1, nrow_local
828 0 : i_row_global = row_indices(ii)
829 0 : coeff = (eig_vals(i_row_global)**beta)/my_gamma
830 0 : fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
831 : END DO
832 : END DO
833 : END IF
834 :
835 32 : CALL timestop(handle)
836 :
837 32 : END SUBROUTINE
838 :
839 : ! **************************************************************************************************
840 : !> \brief ...
841 : !> \param idx_prim ...
842 : !> \param idx_sec ...
843 : !> \param eigvec_entries ...
844 : ! **************************************************************************************************
845 1200 : SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
846 :
847 : INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_prim, idx_sec
848 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries
849 :
850 : CHARACTER(LEN=*), PARAMETER :: routineN = 'sort_excitations'
851 :
852 : INTEGER :: handle, ii, kk, num_entries, num_mults
853 1200 : INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_prim_work, idx_sec_work, tmp_index
854 : LOGICAL :: unique_entries
855 1200 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries_work
856 :
857 1200 : CALL timeset(routineN, handle)
858 :
859 1200 : num_entries = SIZE(idx_prim)
860 :
861 3200 : ALLOCATE (tmp_index(num_entries))
862 :
863 1200 : CALL sort(idx_prim, num_entries, tmp_index)
864 :
865 2000 : ALLOCATE (idx_sec_work(num_entries))
866 3200 : ALLOCATE (eigvec_entries_work(num_entries))
867 :
868 3104 : DO ii = 1, num_entries
869 1904 : idx_sec_work(ii) = idx_sec(tmp_index(ii))
870 3104 : eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
871 : END DO
872 :
873 1200 : DEALLOCATE (tmp_index)
874 1200 : DEALLOCATE (idx_sec)
875 1200 : DEALLOCATE (eigvec_entries)
876 :
877 1200 : CALL MOVE_ALLOC(idx_sec_work, idx_sec)
878 1200 : CALL MOVE_ALLOC(eigvec_entries_work, eigvec_entries)
879 :
880 : !Now check for multiple entries in first idx to check necessity of sorting in second idx
881 1200 : CALL sort_unique(idx_prim, unique_entries)
882 1200 : IF (.NOT. unique_entries) THEN
883 352 : ALLOCATE (idx_prim_work(num_entries))
884 1008 : idx_prim_work(:) = idx_prim(:)
885 : ! Find duplicate entries in idx_prim
886 1008 : DO ii = 1, num_entries
887 832 : IF (idx_prim_work(ii) == 0) CYCLE
888 3436 : num_mults = COUNT(idx_prim_work == idx_prim_work(ii))
889 572 : IF (num_mults > 1) THEN
890 : !Set all duplicate entries to 0
891 684 : idx_prim_work(ii:ii + num_mults - 1) = 0
892 : !Start sorting in secondary index
893 636 : ALLOCATE (idx_sec_work(num_mults))
894 636 : ALLOCATE (eigvec_entries_work(num_mults))
895 684 : idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
896 684 : eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
897 424 : ALLOCATE (tmp_index(num_mults))
898 212 : CALL sort(idx_sec_work, num_mults, tmp_index)
899 :
900 : !Now write newly sorted indices to original arrays
901 684 : DO kk = ii, ii + num_mults - 1
902 472 : idx_sec(kk) = idx_sec_work(kk - ii + 1)
903 684 : eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
904 : END DO
905 : !Deallocate work arrays
906 212 : DEALLOCATE (tmp_index)
907 212 : DEALLOCATE (idx_sec_work)
908 212 : DEALLOCATE (eigvec_entries_work)
909 : END IF
910 1008 : idx_prim_work(ii) = idx_prim(ii)
911 : END DO
912 176 : DEALLOCATE (idx_prim_work)
913 : END IF
914 :
915 1200 : CALL timestop(handle)
916 :
917 3600 : END SUBROUTINE sort_excitations
918 :
919 : ! **************************************************************************************************
920 : !> \brief Roughly estimates the needed runtime and memory during the BSE run
921 : !> \param homo_red ...
922 : !> \param virtual_red ...
923 : !> \param unit_nr ...
924 : !> \param bse_abba ...
925 : !> \param para_env ...
926 : !> \param diag_runtime_est ...
927 : ! **************************************************************************************************
928 32 : SUBROUTINE estimate_BSE_resources(homo_red, virtual_red, unit_nr, bse_abba, &
929 : para_env, diag_runtime_est)
930 :
931 : INTEGER :: homo_red, virtual_red, unit_nr
932 : LOGICAL :: bse_abba
933 : TYPE(mp_para_env_type), POINTER :: para_env
934 : REAL(KIND=dp) :: diag_runtime_est
935 :
936 : CHARACTER(LEN=*), PARAMETER :: routineN = 'estimate_BSE_resources'
937 :
938 : INTEGER :: handle, num_BSE_matrices
939 : INTEGER(KIND=int_8) :: full_dim
940 : REAL(KIND=dp) :: mem_est, mem_est_per_rank
941 :
942 32 : CALL timeset(routineN, handle)
943 :
944 : ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
945 32 : num_BSE_matrices = 2
946 : ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
947 : ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
948 32 : IF (bse_abba) THEN
949 16 : num_BSE_matrices = 10
950 : END IF
951 :
952 32 : full_dim = (INT(homo_red, KIND=int_8)**2*INT(virtual_red, KIND=int_8)**2)*INT(num_BSE_matrices, KIND=int_8)
953 32 : mem_est = REAL(8*full_dim, KIND=dp)/REAL(1024**3, KIND=dp)
954 32 : mem_est_per_rank = REAL(mem_est/para_env%num_pe, KIND=dp)
955 :
956 32 : IF (unit_nr > 0) THEN
957 : ! WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
958 : ! mem_est
959 16 : WRITE (unit_nr, '(T2,A4,T7,A40,T68,ES13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
960 32 : mem_est
961 16 : WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
962 32 : mem_est_per_rank
963 16 : WRITE (unit_nr, '(T2,A4)') 'BSE|'
964 : END IF
965 : ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
966 : ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
967 : diag_runtime_est = REAL(INT(homo_red, KIND=int_8)*INT(virtual_red, KIND=int_8)/11000_int_8, KIND=dp)**3* &
968 32 : 10*32/REAL(para_env%num_pe, KIND=dp)
969 :
970 32 : CALL timestop(handle)
971 :
972 32 : END SUBROUTINE estimate_BSE_resources
973 :
974 : ! **************************************************************************************************
975 : !> \brief Filters eigenvector entries above a given threshold to describe excitations in the
976 : !> singleparticle basis
977 : !> \param fm_eigvec ...
978 : !> \param idx_homo ...
979 : !> \param idx_virt ...
980 : !> \param eigvec_entries ...
981 : !> \param i_exc ...
982 : !> \param virtual ...
983 : !> \param num_entries ...
984 : !> \param mp2_env ...
985 : ! **************************************************************************************************
986 1200 : SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
987 : i_exc, virtual, num_entries, mp2_env)
988 :
989 : TYPE(cp_fm_type), INTENT(IN) :: fm_eigvec
990 : INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_homo, idx_virt
991 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries
992 : INTEGER :: i_exc, virtual, num_entries
993 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
994 :
995 : CHARACTER(LEN=*), PARAMETER :: routineN = 'filter_eigvec_contrib'
996 :
997 : INTEGER :: eigvec_idx, handle, ii, iproc, jj, kk, &
998 : ncol_local, nrow_local, &
999 : num_entries_local
1000 : INTEGER, ALLOCATABLE, DIMENSION(:) :: num_entries_to_comm
1001 1200 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1002 : REAL(KIND=dp) :: eigvec_entry
1003 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1004 1200 : DIMENSION(:) :: buffer_entries
1005 : TYPE(mp_para_env_type), POINTER :: para_env
1006 :
1007 1200 : CALL timeset(routineN, handle)
1008 :
1009 1200 : para_env => fm_eigvec%matrix_struct%para_env
1010 :
1011 : CALL cp_fm_get_info(matrix=fm_eigvec, &
1012 : nrow_local=nrow_local, &
1013 : ncol_local=ncol_local, &
1014 : row_indices=row_indices, &
1015 1200 : col_indices=col_indices)
1016 :
1017 3600 : ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
1018 3600 : num_entries_to_comm(:) = 0
1019 :
1020 58800 : DO jj = 1, ncol_local
1021 : !First check if i is localized on this proc
1022 57600 : IF (col_indices(jj) /= i_exc) THEN
1023 : CYCLE
1024 : END IF
1025 31200 : DO ii = 1, nrow_local
1026 28800 : eigvec_idx = row_indices(ii)
1027 28800 : eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
1028 86400 : IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
1029 952 : num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
1030 : END IF
1031 : END DO
1032 : END DO
1033 :
1034 : !Gather number of entries of other processes
1035 1200 : CALL para_env%sum(num_entries_to_comm)
1036 :
1037 1200 : num_entries_local = num_entries_to_comm(para_env%mepos)
1038 :
1039 6000 : ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
1040 :
1041 3600 : DO iproc = 0, para_env%num_pe - 1
1042 6060 : ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
1043 6060 : ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
1044 4304 : buffer_entries(iproc)%msg = 0.0_dp
1045 12208 : buffer_entries(iproc)%indx = 0
1046 : END DO
1047 :
1048 : kk = 1
1049 58800 : DO jj = 1, ncol_local
1050 : !First check if i is localized on this proc
1051 57600 : IF (col_indices(jj) /= i_exc) THEN
1052 : CYCLE
1053 : END IF
1054 31200 : DO ii = 1, nrow_local
1055 28800 : eigvec_idx = row_indices(ii)
1056 28800 : eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
1057 86400 : IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
1058 952 : buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
1059 952 : buffer_entries(para_env%mepos)%indx(kk, 2) = MOD(eigvec_idx - 1, virtual) + 1
1060 952 : buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
1061 952 : kk = kk + 1
1062 : END IF
1063 : END DO
1064 : END DO
1065 :
1066 3600 : DO iproc = 0, para_env%num_pe - 1
1067 2400 : CALL para_env%sum(buffer_entries(iproc)%msg)
1068 3600 : CALL para_env%sum(buffer_entries(iproc)%indx)
1069 : END DO
1070 :
1071 : !Now sum up gathered information
1072 3600 : num_entries = SUM(num_entries_to_comm)
1073 3200 : ALLOCATE (idx_homo(num_entries))
1074 2000 : ALLOCATE (idx_virt(num_entries))
1075 3200 : ALLOCATE (eigvec_entries(num_entries))
1076 :
1077 1200 : kk = 1
1078 3600 : DO iproc = 0, para_env%num_pe - 1
1079 3600 : IF (num_entries_to_comm(iproc) /= 0) THEN
1080 3164 : DO ii = 1, num_entries_to_comm(iproc)
1081 1904 : idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
1082 1904 : idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
1083 1904 : eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
1084 3164 : kk = kk + 1
1085 : END DO
1086 : END IF
1087 : END DO
1088 :
1089 : !Deallocate all the used arrays
1090 3600 : DO iproc = 0, para_env%num_pe - 1
1091 2400 : DEALLOCATE (buffer_entries(iproc)%msg)
1092 3600 : DEALLOCATE (buffer_entries(iproc)%indx)
1093 : END DO
1094 4800 : DEALLOCATE (buffer_entries)
1095 1200 : DEALLOCATE (num_entries_to_comm)
1096 1200 : NULLIFY (row_indices)
1097 1200 : NULLIFY (col_indices)
1098 :
1099 : !Now sort the results according to the involved singleparticle orbitals
1100 : ! (homo first, then virtual)
1101 1200 : CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
1102 :
1103 1200 : CALL timestop(handle)
1104 :
1105 1200 : END SUBROUTINE
1106 :
1107 : ! **************************************************************************************************
1108 : !> \brief Reads cutoffs for BSE from mp2_env and compares to energies in Eigenval to extract
1109 : !> reduced homo/virtual and
1110 : !> \param Eigenval array (1d) with energies, can be e.g. from GW or DFT
1111 : !> \param homo Total number of occupied orbitals
1112 : !> \param virtual Total number of unoccupied orbitals
1113 : !> \param homo_red Total number of occupied orbitals to include after cutoff
1114 : !> \param virt_red Total number of unoccupied orbitals to include after ctuoff
1115 : !> \param homo_incl First occupied index to include after cutoff
1116 : !> \param virt_incl Last unoccupied index to include after cutoff
1117 : !> \param mp2_env ...
1118 : ! **************************************************************************************************
1119 64 : SUBROUTINE determine_cutoff_indices(Eigenval, &
1120 : homo, virtual, &
1121 : homo_red, virt_red, &
1122 : homo_incl, virt_incl, &
1123 : mp2_env)
1124 :
1125 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval
1126 : INTEGER, INTENT(IN) :: homo, virtual
1127 : INTEGER, INTENT(OUT) :: homo_red, virt_red, homo_incl, virt_incl
1128 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1129 :
1130 : CHARACTER(LEN=*), PARAMETER :: routineN = 'determine_cutoff_indices'
1131 :
1132 : INTEGER :: handle, i_homo, j_virt
1133 :
1134 64 : CALL timeset(routineN, handle)
1135 : ! Determine index in homo and virtual for truncation
1136 : ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
1137 64 : IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
1138 : IF (-mp2_env%bse%bse_cutoff_occ .LT. Eigenval(1) - Eigenval(homo) &
1139 64 : .OR. mp2_env%bse%bse_cutoff_occ < 0) THEN
1140 64 : homo_red = homo
1141 64 : homo_incl = 1
1142 : ELSE
1143 0 : homo_incl = 1
1144 0 : DO i_homo = 1, homo
1145 0 : IF (Eigenval(i_homo) - Eigenval(homo) .GT. -mp2_env%bse%bse_cutoff_occ) THEN
1146 0 : homo_incl = i_homo
1147 0 : EXIT
1148 : END IF
1149 : END DO
1150 0 : homo_red = homo - homo_incl + 1
1151 : END IF
1152 :
1153 : IF (mp2_env%bse%bse_cutoff_empty .GT. Eigenval(homo + virtual) - Eigenval(homo + 1) &
1154 64 : .OR. mp2_env%bse%bse_cutoff_empty < 0) THEN
1155 0 : virt_red = virtual
1156 0 : virt_incl = virtual
1157 : ELSE
1158 64 : virt_incl = homo + 1
1159 832 : DO j_virt = 1, virtual
1160 832 : IF (Eigenval(homo + j_virt) - Eigenval(homo + 1) .GT. mp2_env%bse%bse_cutoff_empty) THEN
1161 64 : virt_incl = j_virt - 1
1162 64 : EXIT
1163 : END IF
1164 : END DO
1165 64 : virt_red = virt_incl
1166 : END IF
1167 : ELSE
1168 0 : homo_red = homo
1169 0 : virt_red = virtual
1170 0 : homo_incl = 1
1171 0 : virt_incl = virtual
1172 : END IF
1173 :
1174 64 : CALL timestop(handle)
1175 :
1176 64 : END SUBROUTINE
1177 :
1178 : ! **************************************************************************************************
1179 : !> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
1180 : !> \param fm_mat_S_ia_bse ...
1181 : !> \param fm_mat_S_ij_bse ...
1182 : !> \param fm_mat_S_ab_bse ...
1183 : !> \param fm_mat_S_trunc ...
1184 : !> \param fm_mat_S_ij_trunc ...
1185 : !> \param fm_mat_S_ab_trunc ...
1186 : !> \param Eigenval_scf ...
1187 : !> \param Eigenval ...
1188 : !> \param Eigenval_reduced ...
1189 : !> \param homo ...
1190 : !> \param virtual ...
1191 : !> \param dimen_RI ...
1192 : !> \param unit_nr ...
1193 : !> \param bse_lev_virt ...
1194 : !> \param homo_red ...
1195 : !> \param virt_red ...
1196 : !> \param mp2_env ...
1197 : ! **************************************************************************************************
1198 160 : SUBROUTINE truncate_BSE_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
1199 : fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
1200 32 : Eigenval_scf, Eigenval, Eigenval_reduced, &
1201 : homo, virtual, dimen_RI, unit_nr, &
1202 : bse_lev_virt, &
1203 : homo_red, virt_red, &
1204 : mp2_env)
1205 :
1206 : TYPE(cp_fm_type), INTENT(IN) :: fm_mat_S_ia_bse, fm_mat_S_ij_bse, &
1207 : fm_mat_S_ab_bse
1208 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_trunc, fm_mat_S_ij_trunc, &
1209 : fm_mat_S_ab_trunc
1210 : REAL(KIND=dp), DIMENSION(:) :: Eigenval_scf, Eigenval
1211 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: Eigenval_reduced
1212 : INTEGER, INTENT(IN) :: homo, virtual, dimen_RI, unit_nr, &
1213 : bse_lev_virt
1214 : INTEGER, INTENT(OUT) :: homo_red, virt_red
1215 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1216 :
1217 : CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_BSE_matrices'
1218 :
1219 : INTEGER :: handle, homo_incl, virt_incl
1220 : TYPE(cp_blacs_env_type), POINTER :: context
1221 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_ab, fm_struct_ia, fm_struct_ij
1222 : TYPE(mp_para_env_type), POINTER :: para_env
1223 :
1224 32 : CALL timeset(routineN, handle)
1225 :
1226 : ! Determine index in homo and virtual for truncation
1227 : ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
1228 :
1229 : CALL determine_cutoff_indices(Eigenval_scf, &
1230 : homo, virtual, &
1231 : homo_red, virt_red, &
1232 : homo_incl, virt_incl, &
1233 32 : mp2_env)
1234 :
1235 32 : IF (unit_nr > 0) THEN
1236 16 : IF (mp2_env%bse%bse_cutoff_occ > 0) THEN
1237 16 : WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
1238 32 : mp2_env%bse%bse_cutoff_occ*evolt
1239 : ELSE
1240 0 : WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
1241 : END IF
1242 16 : IF (mp2_env%bse%bse_cutoff_empty > 0) THEN
1243 16 : WRITE (unit_nr, '(T2,A4,T7,A26,T71,F10.3)') 'BSE|', 'Cutoff empty orbitals [eV]', &
1244 32 : mp2_env%bse%bse_cutoff_empty*evolt
1245 : ELSE
1246 0 : WRITE (unit_nr, '(T2,A4,T7,A34)') 'BSE|', 'No cutoff given for empty orbitals'
1247 : END IF
1248 16 : WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
1249 16 : WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Last empty index (not MO index!)', virt_incl
1250 16 : WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', Eigenval(homo_incl)*evolt
1251 16 : WRITE (unit_nr, '(T2,A4,T7,A31,T71,F10.3)') 'BSE|', 'Energy of last empty index [eV]', Eigenval(homo + virt_incl)*evolt
1252 16 : WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
1253 32 : -(Eigenval(homo_incl) - Eigenval(homo))*evolt
1254 16 : WRITE (unit_nr, '(T2,A4,T7,A50,T71,F10.3)') 'BSE|', 'Energy difference of last empty index to LUMO [eV]', &
1255 32 : (Eigenval(homo + virt_incl) - Eigenval(homo + 1))*evolt
1256 16 : WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
1257 16 : WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Number of GW-corrected empty MOs', mp2_env%ri_g0w0%corr_mos_virt
1258 16 : WRITE (unit_nr, '(T2,A4)') 'BSE|'
1259 : END IF
1260 32 : IF (unit_nr > 0) THEN
1261 16 : IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
1262 0 : CPABORT("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
1263 : END IF
1264 16 : IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
1265 0 : CPABORT("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
1266 : END IF
1267 : END IF
1268 : !Truncate full fm_S matrices
1269 : !Allocate new truncated matrices of proper size
1270 32 : para_env => fm_mat_S_ia_bse%matrix_struct%para_env
1271 32 : context => fm_mat_S_ia_bse%matrix_struct%context
1272 :
1273 32 : CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_RI, homo_red*virt_red)
1274 32 : CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_RI, homo_red*homo_red)
1275 32 : CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_RI, virt_red*virt_red)
1276 :
1277 32 : CALL cp_fm_create(fm_mat_S_trunc, fm_struct_ia, "fm_S_trunc")
1278 32 : CALL cp_fm_create(fm_mat_S_ij_trunc, fm_struct_ij, "fm_S_ij_trunc")
1279 32 : CALL cp_fm_create(fm_mat_S_ab_trunc, fm_struct_ab, "fm_S_ab_trunc")
1280 :
1281 : !Copy parts of original matrices to truncated ones
1282 32 : IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
1283 : !Truncate eigenvals
1284 96 : ALLOCATE (Eigenval_reduced(homo_red + virt_red))
1285 544 : Eigenval_reduced(:) = Eigenval(homo_incl:homo + virt_incl)
1286 :
1287 : CALL truncate_fm(fm_mat_S_trunc, fm_mat_S_ia_bse, virtual, &
1288 : homo_red, virt_red, unit_nr, mp2_env, &
1289 32 : nrow_offset=homo_incl)
1290 : CALL truncate_fm(fm_mat_S_ij_trunc, fm_mat_S_ij_bse, homo, &
1291 : homo_red, homo_red, unit_nr, mp2_env, &
1292 32 : homo_incl, homo_incl)
1293 : CALL truncate_fm(fm_mat_S_ab_trunc, fm_mat_S_ab_bse, bse_lev_virt, &
1294 32 : virt_red, virt_red, unit_nr, mp2_env)
1295 :
1296 : ELSE
1297 0 : IF (unit_nr > 0) THEN
1298 0 : WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
1299 0 : WRITE (unit_nr, '(T2,A4)') 'BSE|'
1300 : END IF
1301 0 : ALLOCATE (Eigenval_reduced(homo_red + virt_red))
1302 0 : Eigenval_reduced(:) = Eigenval(:)
1303 : CALL cp_fm_to_fm_submat_general(fm_mat_S_ia_bse, fm_mat_S_trunc, dimen_RI, homo_red*virt_red, &
1304 0 : 1, 1, 1, 1, context)
1305 : CALL cp_fm_to_fm_submat_general(fm_mat_S_ij_bse, fm_mat_S_ij_trunc, dimen_RI, homo_red*homo_red, &
1306 0 : 1, 1, 1, 1, context)
1307 : CALL cp_fm_to_fm_submat_general(fm_mat_S_ab_bse, fm_mat_S_ab_trunc, dimen_RI, virt_red*virt_red, &
1308 0 : 1, 1, 1, 1, context)
1309 : END IF
1310 :
1311 32 : CALL cp_fm_struct_release(fm_struct_ia)
1312 32 : CALL cp_fm_struct_release(fm_struct_ij)
1313 32 : CALL cp_fm_struct_release(fm_struct_ab)
1314 :
1315 32 : NULLIFY (para_env)
1316 32 : NULLIFY (context)
1317 :
1318 32 : CALL timestop(handle)
1319 :
1320 32 : END SUBROUTINE truncate_BSE_matrices
1321 :
1322 : ! **************************************************************************************************
1323 : !> \brief ...
1324 : !> \param fm_eigvec ...
1325 : !> \param fm_eigvec_reshuffled ...
1326 : !> \param homo ...
1327 : !> \param virtual ...
1328 : !> \param n_exc ...
1329 : !> \param do_transpose ...
1330 : !> \param unit_nr ...
1331 : !> \param mp2_env ...
1332 : ! **************************************************************************************************
1333 900 : SUBROUTINE reshuffle_eigvec(fm_eigvec, fm_eigvec_reshuffled, homo, virtual, n_exc, do_transpose, &
1334 : unit_nr, mp2_env)
1335 :
1336 : TYPE(cp_fm_type), INTENT(IN) :: fm_eigvec
1337 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_eigvec_reshuffled
1338 : INTEGER, INTENT(IN) :: homo, virtual, n_exc
1339 : LOGICAL, INTENT(IN) :: do_transpose
1340 : INTEGER, INTENT(IN) :: unit_nr
1341 : TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1342 :
1343 : CHARACTER(LEN=*), PARAMETER :: routineN = 'reshuffle_eigvec'
1344 :
1345 : INTEGER :: handle, my_m_col, my_n_row
1346 : INTEGER, DIMENSION(4) :: reordering
1347 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_eigvec_col, &
1348 : fm_struct_eigvec_reshuffled
1349 : TYPE(cp_fm_type) :: fm_eigvec_col
1350 :
1351 300 : CALL timeset(routineN, handle)
1352 :
1353 : ! Define reordering:
1354 : ! (ia,11) to (a1,i1) for transposition
1355 : ! (ia,11) to (i1,a1) for default
1356 300 : IF (do_transpose) THEN
1357 50 : reordering = (/2, 3, 1, 4/)
1358 50 : my_n_row = virtual
1359 50 : my_m_col = homo
1360 : ELSE
1361 250 : reordering = (/1, 3, 2, 4/)
1362 250 : my_n_row = homo
1363 250 : my_m_col = virtual
1364 : END IF
1365 :
1366 : CALL cp_fm_struct_create(fm_struct_eigvec_col, &
1367 : fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
1368 300 : homo*virtual, 1)
1369 : CALL cp_fm_struct_create(fm_struct_eigvec_reshuffled, &
1370 : fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
1371 300 : my_n_row, my_m_col)
1372 :
1373 : ! Resort indices
1374 300 : CALL cp_fm_create(fm_eigvec_col, fm_struct_eigvec_col, name="BSE_column_vector")
1375 300 : CALL cp_fm_set_all(fm_eigvec_col, 0.0_dp)
1376 300 : CALL cp_fm_create(fm_eigvec_reshuffled, fm_struct_eigvec_reshuffled, name="BSE_reshuffled_eigenvector")
1377 300 : CALL cp_fm_set_all(fm_eigvec_reshuffled, 0.0_dp)
1378 : ! Fill matrix
1379 : CALL cp_fm_to_fm_submat(fm_eigvec, fm_eigvec_col, &
1380 : homo*virtual, 1, &
1381 : 1, n_exc, &
1382 300 : 1, 1)
1383 : ! Reshuffle
1384 : CALL fm_general_add_bse(fm_eigvec_reshuffled, fm_eigvec_col, 1.0_dp, &
1385 : virtual, 1, &
1386 : 1, 1, &
1387 300 : unit_nr, reordering, mp2_env)
1388 :
1389 300 : CALL cp_fm_release(fm_eigvec_col)
1390 300 : CALL cp_fm_struct_release(fm_struct_eigvec_col)
1391 300 : CALL cp_fm_struct_release(fm_struct_eigvec_reshuffled)
1392 :
1393 300 : CALL timestop(handle)
1394 :
1395 300 : END SUBROUTINE reshuffle_eigvec
1396 :
1397 : ! **************************************************************************************************
1398 : !> \brief Borrowed from the tddfpt module with slight adaptions
1399 : !> \param qs_env ...
1400 : !> \param mos ...
1401 : !> \param istate ...
1402 : !> \param info_approximation ...
1403 : !> \param stride ...
1404 : !> \param append_cube ...
1405 : !> \param print_section ...
1406 : ! **************************************************************************************************
1407 0 : SUBROUTINE print_bse_nto_cubes(qs_env, mos, istate, info_approximation, &
1408 : stride, append_cube, print_section)
1409 :
1410 : TYPE(qs_environment_type), POINTER :: qs_env
1411 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN) :: mos
1412 : INTEGER, INTENT(IN) :: istate
1413 : CHARACTER(LEN=10) :: info_approximation
1414 : INTEGER, DIMENSION(:), POINTER :: stride
1415 : LOGICAL, INTENT(IN) :: append_cube
1416 : TYPE(section_vals_type), POINTER :: print_section
1417 :
1418 : CHARACTER(LEN=*), PARAMETER :: routineN = 'print_bse_nto_cubes'
1419 :
1420 : CHARACTER(LEN=default_path_length) :: filename, info_approx_trunc, &
1421 : my_pos_cube, title
1422 : INTEGER :: handle, i, iset, nmo, unit_nr_cube
1423 : LOGICAL :: mpi_io
1424 0 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1425 : TYPE(cell_type), POINTER :: cell
1426 : TYPE(cp_fm_type), POINTER :: mo_coeff
1427 : TYPE(cp_logger_type), POINTER :: logger
1428 : TYPE(dft_control_type), POINTER :: dft_control
1429 : TYPE(particle_list_type), POINTER :: particles
1430 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1431 : TYPE(pw_c1d_gs_type) :: wf_g
1432 : TYPE(pw_env_type), POINTER :: pw_env
1433 0 : TYPE(pw_pool_p_type), DIMENSION(:), POINTER :: pw_pools
1434 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
1435 : TYPE(pw_r3d_rs_type) :: wf_r
1436 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1437 : TYPE(qs_subsys_type), POINTER :: subsys
1438 :
1439 0 : logger => cp_get_default_logger()
1440 0 : CALL timeset(routineN, handle)
1441 :
1442 0 : CALL get_qs_env(qs_env=qs_env, dft_control=dft_control, pw_env=pw_env)
1443 0 : CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool, pw_pools=pw_pools)
1444 0 : CALL auxbas_pw_pool%create_pw(wf_r)
1445 0 : CALL auxbas_pw_pool%create_pw(wf_g)
1446 :
1447 0 : CALL get_qs_env(qs_env, subsys=subsys)
1448 0 : CALL qs_subsys_get(subsys, particles=particles)
1449 :
1450 0 : my_pos_cube = "REWIND"
1451 0 : IF (append_cube) THEN
1452 0 : my_pos_cube = "APPEND"
1453 : END IF
1454 :
1455 : CALL get_qs_env(qs_env=qs_env, &
1456 : atomic_kind_set=atomic_kind_set, &
1457 : qs_kind_set=qs_kind_set, &
1458 : cell=cell, &
1459 0 : particle_set=particle_set)
1460 :
1461 0 : DO iset = 1, 2
1462 0 : CALL get_mo_set(mo_set=mos(iset), mo_coeff=mo_coeff, nmo=nmo)
1463 0 : DO i = 1, nmo
1464 : CALL calculate_wavefunction(mo_coeff, i, wf_r, wf_g, atomic_kind_set, qs_kind_set, &
1465 0 : cell, dft_control, particle_set, pw_env)
1466 0 : IF (iset == 1) THEN
1467 0 : WRITE (filename, '(A6,I3.3,A5,I2.2,a11)') "_NEXC_", istate, "_NTO_", i, "_Hole_State"
1468 0 : ELSEIF (iset == 2) THEN
1469 0 : WRITE (filename, '(A6,I3.3,A5,I2.2,a15)') "_NEXC_", istate, "_NTO_", i, "_Particle_State"
1470 : END IF
1471 0 : info_approx_trunc = TRIM(ADJUSTL(info_approximation))
1472 0 : info_approx_trunc = info_approx_trunc(2:LEN_TRIM(info_approx_trunc) - 1)
1473 0 : filename = TRIM(info_approx_trunc)//TRIM(filename)
1474 0 : mpi_io = .TRUE.
1475 : unit_nr_cube = cp_print_key_unit_nr(logger, print_section, '', extension=".cube", &
1476 : middle_name=TRIM(filename), file_position=my_pos_cube, &
1477 0 : log_filename=.FALSE., ignore_should_output=.TRUE., mpi_io=mpi_io)
1478 0 : IF (iset == 1) THEN
1479 0 : WRITE (title, *) "Natural Transition Orbital Hole State", i
1480 0 : ELSEIF (iset == 2) THEN
1481 0 : WRITE (title, *) "Natural Transition Orbital Particle State", i
1482 : END IF
1483 0 : CALL cp_pw_to_cube(wf_r, unit_nr_cube, title, particles=particles, stride=stride, mpi_io=mpi_io)
1484 : CALL cp_print_key_finished_output(unit_nr_cube, logger, print_section, '', &
1485 0 : ignore_should_output=.TRUE., mpi_io=mpi_io)
1486 : END DO
1487 : END DO
1488 :
1489 0 : CALL auxbas_pw_pool%give_back_pw(wf_g)
1490 0 : CALL auxbas_pw_pool%give_back_pw(wf_r)
1491 :
1492 0 : CALL timestop(handle)
1493 0 : END SUBROUTINE print_bse_nto_cubes
1494 :
1495 : ! **************************************************************************************************
1496 : !> \brief Borrowed from the tddfpt module with slight adaptions
1497 : !> \param homo ...
1498 : !> \param virtual ...
1499 : !> \param unit_nr ...
1500 : !> \param mp2_env ...
1501 : ! **************************************************************************************************
1502 32 : SUBROUTINE adapt_BSE_input_params(homo, virtual, unit_nr, mp2_env)
1503 :
1504 : INTEGER, INTENT(IN) :: homo, virtual, unit_nr
1505 : TYPE(mp2_type) :: mp2_env
1506 :
1507 : CHARACTER(LEN=*), PARAMETER :: routineN = 'adapt_BSE_input_params'
1508 :
1509 : INTEGER :: handle, i, j, n, &
1510 : num_state_list_exceptions
1511 :
1512 32 : CALL timeset(routineN, handle)
1513 :
1514 : ! Handle negative NUM_PRINT_EXC
1515 32 : IF (mp2_env%bse%num_print_exc < 0 .OR. &
1516 : mp2_env%bse%num_print_exc > homo*virtual) THEN
1517 0 : mp2_env%bse%num_print_exc = homo*virtual
1518 0 : IF (unit_nr > 0) THEN
1519 : CALL cp_warn(__LOCATION__, &
1520 : "Keyword NUM_PRINT_EXC is either negative or too large. "// &
1521 0 : "Printing all computed excitations.")
1522 : END IF
1523 : END IF
1524 :
1525 : ! Default to NUM_PRINT_EXC if too large or negative,
1526 : ! but only if NTOs are called - would be confusing for the user otherwise
1527 : ! Prepare and adapt user inputs for NTO analysis
1528 : ! Logic: Explicit state list overrides NUM_PRINT_EXC_NTOS
1529 : ! If only NUM_PRINT_EXC_NTOS is given, we write the array 1,...,NUM_PRINT_EXC_NTOS to
1530 : ! bse_nto_state_list
1531 32 : IF (mp2_env%bse%do_nto_analysis) THEN
1532 4 : IF (mp2_env%bse%explicit_nto_list) THEN
1533 0 : IF (mp2_env%bse%num_print_exc_ntos > 0) THEN
1534 0 : IF (unit_nr > 0) THEN
1535 : CALL cp_warn(__LOCATION__, &
1536 : "Keywords NUM_PRINT_EXC_NTOS and STATE_LIST are both given in input. "// &
1537 0 : "Overriding NUM_PRINT_EXC_NTOS.")
1538 : END IF
1539 : END IF
1540 : ! Check if all states are within the range
1541 : ! Count them and initialize new array afterwards
1542 0 : num_state_list_exceptions = 0
1543 0 : DO i = 1, SIZE(mp2_env%bse%bse_nto_state_list)
1544 0 : IF (mp2_env%bse%bse_nto_state_list(i) < 1 .OR. &
1545 0 : mp2_env%bse%bse_nto_state_list(i) > mp2_env%bse%num_print_exc) THEN
1546 0 : num_state_list_exceptions = num_state_list_exceptions + 1
1547 : END IF
1548 : END DO
1549 0 : IF (num_state_list_exceptions > 0) THEN
1550 0 : IF (unit_nr > 0) THEN
1551 : CALL cp_warn(__LOCATION__, &
1552 : "STATE_LIST contains indices outside the range of included excitation levels. "// &
1553 0 : "Ignoring these states.")
1554 : END IF
1555 : END IF
1556 0 : n = SIZE(mp2_env%bse%bse_nto_state_list) - num_state_list_exceptions
1557 0 : ALLOCATE (mp2_env%bse%bse_nto_state_list_final(n))
1558 0 : mp2_env%bse%bse_nto_state_list_final(:) = 0
1559 : i = 1
1560 0 : DO j = 1, SIZE(mp2_env%bse%bse_nto_state_list)
1561 0 : IF (mp2_env%bse%bse_nto_state_list(j) >= 1 .AND. &
1562 0 : mp2_env%bse%bse_nto_state_list(j) <= mp2_env%bse%num_print_exc) THEN
1563 0 : mp2_env%bse%bse_nto_state_list_final(i) = mp2_env%bse%bse_nto_state_list(j)
1564 0 : i = i + 1
1565 : END IF
1566 : END DO
1567 :
1568 0 : mp2_env%bse%num_print_exc_ntos = SIZE(mp2_env%bse%bse_nto_state_list_final)
1569 : ELSE
1570 4 : IF (mp2_env%bse%num_print_exc_ntos > mp2_env%bse%num_print_exc .OR. &
1571 : mp2_env%bse%num_print_exc_ntos < 0) THEN
1572 4 : mp2_env%bse%num_print_exc_ntos = mp2_env%bse%num_print_exc
1573 : END IF
1574 12 : ALLOCATE (mp2_env%bse%bse_nto_state_list_final(mp2_env%bse%num_print_exc_ntos))
1575 104 : DO i = 1, mp2_env%bse%num_print_exc_ntos
1576 104 : mp2_env%bse%bse_nto_state_list_final(i) = i
1577 : END DO
1578 : END IF
1579 : END IF
1580 :
1581 : ! Takes care of triplet states, when oscillator strengths are 0
1582 32 : IF (mp2_env%bse%bse_spin_config /= 0 .AND. &
1583 : mp2_env%bse%eps_nto_osc_str > 0) THEN
1584 0 : IF (unit_nr > 0) THEN
1585 : CALL cp_warn(__LOCATION__, &
1586 : "Cannot apply EPS_OSC_STR for Triplet excitations. "// &
1587 0 : "Resetting EPS_OSC_STR to default.")
1588 : END IF
1589 0 : mp2_env%bse%eps_nto_osc_str = -1.0_dp
1590 : END IF
1591 :
1592 : ! Take care of number for computed exciton descriptors
1593 32 : IF (mp2_env%bse%num_print_exc_descr < 0 .OR. &
1594 : mp2_env%bse%num_print_exc_descr > mp2_env%bse%num_print_exc) THEN
1595 4 : IF (unit_nr > 0) THEN
1596 : CALL cp_warn(__LOCATION__, &
1597 : "Keyword NUM_PRINT_EXC_DESCR is either negative or too large. "// &
1598 2 : "Printing exciton descriptors up to NUM_PRINT_EXC.")
1599 : END IF
1600 4 : mp2_env%bse%num_print_exc_descr = mp2_env%bse%num_print_exc
1601 : END IF
1602 :
1603 32 : CALL timestop(handle)
1604 32 : END SUBROUTINE adapt_BSE_input_params
1605 :
1606 : ! **************************************************************************************************
1607 :
1608 : ! **************************************************************************************************
1609 : !> \brief ...
1610 : !> \param fm_multipole_ai_trunc ...
1611 : !> \param fm_multipole_ij_trunc ...
1612 : !> \param fm_multipole_ab_trunc ...
1613 : !> \param qs_env ...
1614 : !> \param mo_coeff ...
1615 : !> \param rpoint ...
1616 : !> \param n_moments ...
1617 : !> \param homo_red ...
1618 : !> \param virtual_red ...
1619 : !> \param context_BSE ...
1620 : ! **************************************************************************************************
1621 36 : SUBROUTINE get_multipoles_mo(fm_multipole_ai_trunc, fm_multipole_ij_trunc, fm_multipole_ab_trunc, &
1622 36 : qs_env, mo_coeff, rpoint, n_moments, &
1623 : homo_red, virtual_red, context_BSE)
1624 :
1625 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
1626 : INTENT(INOUT) :: fm_multipole_ai_trunc, &
1627 : fm_multipole_ij_trunc, &
1628 : fm_multipole_ab_trunc
1629 : TYPE(qs_environment_type), POINTER :: qs_env
1630 : TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1631 : REAL(dp), ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: rpoint
1632 : INTEGER, INTENT(IN) :: n_moments, homo_red, virtual_red
1633 : TYPE(cp_blacs_env_type), POINTER :: context_BSE
1634 :
1635 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_multipoles_mo'
1636 :
1637 : INTEGER :: handle, idir, n_multipole, n_occ, &
1638 : n_virt, nao
1639 36 : REAL(KIND=dp), DIMENSION(:), POINTER :: ref_point
1640 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_mp_ab_trunc, fm_struct_mp_ai_trunc, &
1641 : fm_struct_mp_ij_trunc, fm_struct_multipoles_ao, fm_struct_nao_nao
1642 : TYPE(cp_fm_type) :: fm_work
1643 36 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_multipole_per_dir
1644 36 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_multipole, matrix_s
1645 36 : TYPE(mo_set_type), DIMENSION(:), POINTER :: mos
1646 : TYPE(mp_para_env_type), POINTER :: para_env_BSE
1647 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1648 36 : POINTER :: sab_orb
1649 :
1650 36 : CALL timeset(routineN, handle)
1651 :
1652 : !First, we calculate the AO dipoles
1653 36 : NULLIFY (sab_orb, matrix_s)
1654 : CALL get_qs_env(qs_env, &
1655 : mos=mos, &
1656 : matrix_s=matrix_s, &
1657 36 : sab_orb=sab_orb)
1658 :
1659 : ! Use the same blacs environment as for the MO coefficients to ensure correct multiplication dbcsr x fm later on
1660 36 : fm_struct_multipoles_ao => mos(1)%mo_coeff%matrix_struct
1661 : ! BSE has different contexts and blacsenvs
1662 36 : para_env_BSE => context_BSE%para_env
1663 : ! Get size of multipole tensor
1664 36 : n_multipole = (6 + 11*n_moments + 6*n_moments**2 + n_moments**3)/6 - 1
1665 36 : NULLIFY (matrix_multipole)
1666 36 : CALL dbcsr_allocate_matrix_set(matrix_multipole, n_multipole)
1667 240 : ALLOCATE (fm_multipole_per_dir(n_multipole))
1668 168 : DO idir = 1, n_multipole
1669 132 : CALL dbcsr_init_p(matrix_multipole(idir)%matrix)
1670 : CALL dbcsr_create(matrix_multipole(idir)%matrix, name="ao_multipole", &
1671 132 : template=matrix_s(1)%matrix, matrix_type=dbcsr_type_symmetric)
1672 132 : CALL cp_dbcsr_alloc_block_from_nbl(matrix_multipole(idir)%matrix, sab_orb)
1673 168 : CALL dbcsr_set(matrix_multipole(idir)%matrix, 0._dp)
1674 : END DO
1675 :
1676 36 : CALL get_reference_point(rpoint, qs_env=qs_env, reference=use_mom_ref_coac, ref_point=ref_point)
1677 :
1678 36 : CALL build_local_moment_matrix(qs_env, matrix_multipole, n_moments, ref_point=rpoint)
1679 :
1680 36 : NULLIFY (sab_orb)
1681 :
1682 : ! Now we transform them to MO
1683 : ! n_occ is the number of occupied MOs, nao the number of all AOs
1684 : ! Writing homo to n_occ instead if nmo,
1685 : ! takes care of ADDED_MOS, which would overwrite nmo, if invoked
1686 36 : CALL get_mo_set(mo_set=mos(1), homo=n_occ, nao=nao)
1687 36 : n_virt = nao - n_occ
1688 :
1689 : ! At the end, we need four different layouts of matrices in this multiplication, e.g. for a dipole:
1690 : ! D_pq = full multipole matrix for occupied and unoccupied
1691 : ! Final result:D_pq= C_{mu p} <\mu|\vec{r}|\nu> C_{\nu q} EQ.I
1692 : ! \_______/ \___________/ \______/
1693 : ! fm_coeff matrix_multipole fm_coeff
1694 : ! (EQ.Ia) (EQ.Ib) (EQ.Ia)
1695 : ! Intermediate work matrices:
1696 : ! fm_work = <\mu|\vec{r}|\nu> C_{\nu q} EQ.II
1697 :
1698 : ! Struct for the full multipole matrix
1699 : CALL cp_fm_struct_create(fm_struct_nao_nao, &
1700 : fm_struct_multipoles_ao%para_env, fm_struct_multipoles_ao%context, &
1701 36 : nao, nao)
1702 :
1703 : ! At the very end, we copy the multipoles corresponding to truncated BSE indices in i and a
1704 : CALL cp_fm_struct_create(fm_struct_mp_ai_trunc, para_env_BSE, &
1705 36 : context_BSE, virtual_red, homo_red)
1706 : CALL cp_fm_struct_create(fm_struct_mp_ij_trunc, para_env_BSE, &
1707 36 : context_BSE, homo_red, homo_red)
1708 : CALL cp_fm_struct_create(fm_struct_mp_ab_trunc, para_env_BSE, &
1709 36 : context_BSE, virtual_red, virtual_red)
1710 168 : DO idir = 1, n_multipole
1711 : CALL cp_fm_create(fm_multipole_ai_trunc(idir), matrix_struct=fm_struct_mp_ai_trunc, &
1712 132 : name="dipoles_mo_ai_trunc")
1713 132 : CALL cp_fm_set_all(fm_multipole_ai_trunc(idir), 0.0_dp)
1714 : CALL cp_fm_create(fm_multipole_ij_trunc(idir), matrix_struct=fm_struct_mp_ij_trunc, &
1715 132 : name="dipoles_mo_ij_trunc")
1716 132 : CALL cp_fm_set_all(fm_multipole_ij_trunc(idir), 0.0_dp)
1717 : CALL cp_fm_create(fm_multipole_ab_trunc(idir), matrix_struct=fm_struct_mp_ab_trunc, &
1718 132 : name="dipoles_mo_ab_trunc")
1719 168 : CALL cp_fm_set_all(fm_multipole_ab_trunc(idir), 0.0_dp)
1720 : END DO
1721 :
1722 : ! Need another temporary matrix to store intermediate result from right multiplication
1723 : ! D = C_{mu a} <\mu|\vec{r}|\nu> C_{\nu i}
1724 36 : CALL cp_fm_create(fm_work, matrix_struct=fm_struct_nao_nao, name="multipole_work")
1725 36 : CALL cp_fm_set_all(fm_work, 0.0_dp)
1726 :
1727 168 : DO idir = 1, n_multipole
1728 : ! Create the full multipole matrix per direction
1729 132 : CALL cp_fm_create(fm_multipole_per_dir(idir), matrix_struct=fm_struct_nao_nao, name="multipoles_mo")
1730 132 : CALL cp_fm_set_all(fm_multipole_per_dir(idir), 0.0_dp)
1731 : ! Fill final (MO) multipole matrix
1732 : CALL cp_dbcsr_sm_fm_multiply(matrix_multipole(idir)%matrix, mo_coeff(1), &
1733 132 : fm_work, ncol=nao)
1734 : ! Now obtain the multipoles by the final multiplication;
1735 : ! We do that inside the loop to obtain multipoles per axis for print
1736 132 : CALL parallel_gemm('T', 'N', nao, nao, nao, 1.0_dp, mo_coeff(1), fm_work, 0.0_dp, fm_multipole_per_dir(idir))
1737 : ! Truncate full matrix to the BSE indices
1738 : ! D_ai
1739 : CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
1740 : fm_multipole_ai_trunc(idir), &
1741 : virtual_red, &
1742 : homo_red, &
1743 : n_occ + 1, &
1744 : n_occ - homo_red + 1, &
1745 : 1, &
1746 : 1, &
1747 132 : fm_multipole_per_dir(idir)%matrix_struct%context)
1748 : ! D_ij
1749 : CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
1750 : fm_multipole_ij_trunc(idir), &
1751 : homo_red, &
1752 : homo_red, &
1753 : n_occ - homo_red + 1, &
1754 : n_occ - homo_red + 1, &
1755 : 1, &
1756 : 1, &
1757 132 : fm_multipole_per_dir(idir)%matrix_struct%context)
1758 : ! D_ab
1759 : CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
1760 : fm_multipole_ab_trunc(idir), &
1761 : virtual_red, &
1762 : virtual_red, &
1763 : n_occ + 1, &
1764 : n_occ + 1, &
1765 : 1, &
1766 : 1, &
1767 168 : fm_multipole_per_dir(idir)%matrix_struct%context)
1768 : END DO
1769 :
1770 : !Release matrices and structs
1771 36 : NULLIFY (fm_struct_multipoles_ao)
1772 36 : CALL cp_fm_struct_release(fm_struct_mp_ai_trunc)
1773 36 : CALL cp_fm_struct_release(fm_struct_mp_ij_trunc)
1774 36 : CALL cp_fm_struct_release(fm_struct_mp_ab_trunc)
1775 36 : CALL cp_fm_struct_release(fm_struct_nao_nao)
1776 168 : DO idir = 1, n_multipole
1777 168 : CALL cp_fm_release(fm_multipole_per_dir(idir))
1778 : END DO
1779 36 : DEALLOCATE (fm_multipole_per_dir)
1780 36 : CALL cp_fm_release(fm_work)
1781 36 : CALL dbcsr_deallocate_matrix_set(matrix_multipole)
1782 :
1783 36 : CALL timestop(handle)
1784 :
1785 108 : END SUBROUTINE get_multipoles_mo
1786 :
1787 : ! **************************************************************************************************
1788 : !> \brief Computes trace of form Tr{A^T B C} for exciton descriptors
1789 : !> \param fm_A Full Matrix, typically X or Y, in format homo x virtual
1790 : !> \param fm_B ...
1791 : !> \param fm_C ...
1792 : !> \param alpha ...
1793 : ! **************************************************************************************************
1794 10800 : SUBROUTINE trace_exciton_descr(fm_A, fm_B, fm_C, alpha)
1795 :
1796 : TYPE(cp_fm_type), INTENT(IN) :: fm_A, fm_B, fm_C
1797 : REAL(KIND=dp), INTENT(OUT) :: alpha
1798 :
1799 : CHARACTER(LEN=*), PARAMETER :: routineN = 'trace_exciton_descr'
1800 :
1801 : INTEGER :: handle, ncol_A, ncol_B, ncol_C, nrow_A, &
1802 : nrow_B, nrow_C
1803 : TYPE(cp_fm_type) :: fm_work_ia
1804 :
1805 1800 : CALL timeset(routineN, handle)
1806 :
1807 1800 : CALL cp_fm_create(fm_work_ia, fm_A%matrix_struct)
1808 1800 : CALL cp_fm_get_info(fm_A, nrow_global=nrow_A, ncol_global=ncol_A)
1809 1800 : CALL cp_fm_get_info(fm_B, nrow_global=nrow_B, ncol_global=ncol_B)
1810 1800 : CALL cp_fm_get_info(fm_C, nrow_global=nrow_C, ncol_global=ncol_C)
1811 :
1812 : ! Check matrix sizes
1813 1800 : CPASSERT(nrow_A == nrow_B .AND. ncol_A == ncol_C .AND. ncol_B == nrow_C)
1814 :
1815 1800 : CALL cp_fm_set_all(fm_work_ia, 0.0_dp)
1816 :
1817 : CALL parallel_gemm("N", "N", nrow_A, ncol_A, nrow_C, 1.0_dp, &
1818 1800 : fm_B, fm_C, 0.0_dp, fm_work_ia)
1819 :
1820 1800 : CALL cp_fm_trace(fm_A, fm_work_ia, alpha)
1821 :
1822 1800 : CALL cp_fm_release(fm_work_ia)
1823 :
1824 1800 : CALL timestop(handle)
1825 :
1826 1800 : END SUBROUTINE trace_exciton_descr
1827 :
1828 : END MODULE
|