Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief GW using RI-RS Approximation
10 : !> \par History
11 : !> 04.2026 created [Ritaj Tyagi]
12 : ! **************************************************************************************************
13 : MODULE gw_large_cell_Gamma_ri_rs
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE basis_set_types, ONLY: gto_basis_set_type
16 : USE cell_types, ONLY: cell_type,&
17 : pbc
18 : USE cp_dbcsr_api, ONLY: &
19 : dbcsr_add, dbcsr_binary_read, dbcsr_binary_write, dbcsr_copy, dbcsr_create, &
20 : dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &
21 : dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
22 : dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
23 : dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, &
24 : dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
25 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
26 : copy_fm_to_dbcsr,&
27 : dbcsr_deallocate_matrix_set
28 : USE cp_files, ONLY: close_file,&
29 : open_file
30 : USE cp_fm_types, ONLY: cp_fm_create,&
31 : cp_fm_release,&
32 : cp_fm_type
33 : USE cp_log_handling, ONLY: cp_get_default_logger,&
34 : cp_logger_type
35 : USE cp_output_handling, ONLY: cp_p_file,&
36 : cp_print_key_should_output
37 : USE gw_integrals, ONLY: build_3c_integral_block
38 : USE gw_large_cell_gamma, ONLY: G_occ_vir,&
39 : compute_QP_energies,&
40 : delete_unnecessary_files,&
41 : fill_fm_Sigma_c_Gamma_time,&
42 : get_W_MIC,&
43 : multiply_fm_W_MIC_time_with_Minv_Gamma
44 : USE gw_utils, ONLY: de_init_bs_env
45 : USE input_section_types, ONLY: section_vals_type
46 : USE kinds, ONLY: dp
47 : USE machine, ONLY: m_walltime
48 : USE message_passing, ONLY: mp_para_env_type
49 : USE mp2_ri_2c, ONLY: RI_2c_integral_mat
50 : USE orbital_pointers, ONLY: indco,&
51 : ncoset
52 : USE particle_types, ONLY: particle_type
53 : USE post_scf_bandstructure_types, ONLY: post_scf_bandstructure_type
54 : USE qs_environment_types, ONLY: get_qs_env,&
55 : qs_environment_type
56 : USE qs_kind_types, ONLY: get_qs_kind,&
57 : qs_kind_type
58 : #include "./base/base_uses.f90"
59 :
60 : IMPLICIT NONE
61 :
62 : PRIVATE
63 :
64 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gw_large_cell_Gamma_ri_rs'
65 :
66 : PUBLIC :: gw_calc_large_cell_Gamma_ri_rs
67 :
68 : CONTAINS
69 :
70 : ! **************************************************************************************************
71 : !> \brief GW calculation using RI-RS formalism
72 : !> \param qs_env ...
73 : !> \param bs_env ...
74 : ! **************************************************************************************************
75 :
76 10 : SUBROUTINE gw_calc_large_cell_Gamma_ri_rs(qs_env, bs_env)
77 :
78 : TYPE(qs_environment_type), POINTER :: qs_env
79 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
80 :
81 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gw_calc_large_cell_Gamma_ri_rs'
82 :
83 : INTEGER :: handle
84 10 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_Sigma_x_Gamma, fm_W_MIC_time
85 10 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_Sigma_c_Gamma_time
86 :
87 10 : CALL timeset(routineN, handle)
88 :
89 : !!========================================================================
90 : !! 1. Grid Generation for RI-RS
91 : !! (Modified Lebedev grids from Ivan Duchemin and Xavier Blase)
92 : !! Generate flattened 1D array of grid points for RI-RS.
93 : !! Equation: r_g(k) = R_A + r_g(A)
94 : !!========================================================================
95 10 : CALL ri_rs_grid_assembler(qs_env, bs_env, bs_env%ri_rs%grid_points)
96 :
97 : !!========================================================================
98 : !! 2. Atomic Basis Evaluation
99 : !! Compute values of spherical atomic basis functions at grid points.
100 : !! Expression: Φ_μl = Φ_μ(r_l) (mat_phi_mu_l)
101 : !!========================================================================
102 : CALL atomic_basis_at_grid_point(qs_env, bs_env, bs_env%ri_rs%grid_points, &
103 10 : bs_env%ri_rs%mat_phi_mu_l)
104 :
105 : !!========================================================================
106 : !! 3. Compute RI-RS Coefficients (Z_lp)
107 : !! Solve the regularized system for each atom P, where the grid domain
108 : !! is restricted to r_l within a cutoff distance of atom P:
109 : !! a. D_ll' = [ Σ_μ Φ_μ(r_l) Φ_μ(r_l') ]^2 (Equation 13)
110 : !! b. D_lP = Σ_{μν} Φ_μ(r_l) Φ_ν(r_l) (μν|P) (Equation 15)
111 : !! c. Conditioning:
112 : !! Dvec_l = 1 / sqrt(D_ll) (Diagonal scaling vector)
113 : !! D'_ll' = Dvec_l * D_ll' * Dvec_l' + λδ_ll'
114 : !! D'_lP = Dvec_l * D_lP
115 : !! d. Solve: Σ_l' D'_ll' * Z'_l'P = D'_lP (Equation 14)
116 : !! e. Rescale: Z_lP = Z'_lP * Dvec_l (Z_lP stored in mat_Z_lP)
117 : !!========================================================================
118 : CALL compute_coeff_Z_lP(qs_env, bs_env, bs_env%ri_rs%grid_points, &
119 10 : bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
120 :
121 : !!========================================================================
122 : !! 4. Compute Independent-Particle Polarizability (χ)
123 : !! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
124 : !! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
125 : !! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
126 : !! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
127 : !! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
128 : !! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
129 : !!========================================================================
130 : CALL get_mat_chi_Gamma_tau(bs_env, bs_env%mat_chi_Gamma_tau, &
131 10 : bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
132 :
133 : !!========================================================================
134 : !! 5. Compute Screened Interaction (W^MIC)
135 : !! χ_PQ(iτ,k=0) -> χ_PQ(iω,k) -> ε_PQ(iω,k) -> W_PQ(iω,k) -> W^MIC_PQ(iτ)
136 : !!========================================================================
137 10 : CALL get_W_MIC(bs_env, qs_env, bs_env%mat_chi_Gamma_tau, fm_W_MIC_time)
138 :
139 : !!========================================================================
140 : !! 6. Compute Exact Exchange Self-Energy (Σ^x)
141 : !! D_µν = sum_n^occ C_µn(k=0) C_νn(k=0)
142 : !! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
143 : !! V^trunc_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
144 : !! Σ^x_ll' = D_ll' * V^trunc_ll'
145 : !! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
146 : !!========================================================================
147 : CALL compute_Sigma_x(bs_env, qs_env, bs_env%ri_rs%mat_phi_mu_l, &
148 10 : bs_env%ri_rs%mat_Z_lP, fm_Sigma_x_Gamma)
149 :
150 : !!========================================================================
151 : !! 7. Compute Correlation Self-Energy (Σ^c)
152 : !! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
153 : !! Σ^c_ll'(iτ,k=0) = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
154 : !! Σ^c_ll'(iτ,k=0) = G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
155 : !! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l')
156 : !!========================================================================
157 : CALL compute_Sigma_c(bs_env, fm_W_MIC_time, bs_env%ri_rs%mat_phi_mu_l, &
158 10 : bs_env%ri_rs%mat_Z_lP, fm_Sigma_c_Gamma_time)
159 :
160 : !!========================================================================
161 : !! 8. Compute Quasiparticle Energies
162 : !! Σ^c_λσ(iτ,k=0) -> Σ^c_nn(ϵ,k)
163 : !! ϵ_nk^GW = ϵ_nk^DFT + Σ^c_nn(ϵ,k) + Σ^x_nn(k) - v^xc_nn(k)
164 : !!========================================================================
165 10 : CALL compute_QP_energies(bs_env, qs_env, fm_Sigma_x_Gamma, fm_Sigma_c_Gamma_time)
166 :
167 10 : CALL de_init_bs_env(bs_env)
168 :
169 10 : CALL timestop(handle)
170 :
171 10 : END SUBROUTINE gw_calc_large_cell_Gamma_ri_rs
172 :
173 : ! **************************************************************************************************
174 : !> \brief Compute grid points for RI-RS
175 : !> Right now based on Ivan and Xavier implementation
176 : !> JCP 150, 174120 (2019), JCTC 17, 2383 (2021)
177 : !> \param qs_env ...
178 : !> \param bs_env ...
179 : !> \param ri_rs_grid_points ...
180 : ! **************************************************************************************************
181 :
182 10 : SUBROUTINE ri_rs_grid_assembler(qs_env, bs_env, ri_rs_grid_points)
183 :
184 : TYPE(qs_environment_type), POINTER :: qs_env
185 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
186 : REAL(KIND=dp), ALLOCATABLE, INTENT(OUT) :: ri_rs_grid_points(:, :)
187 :
188 : CHARACTER(LEN=*), PARAMETER :: routineN = 'ri_rs_grid_assembler'
189 :
190 : INTEGER :: atom_idx, end_idx, handle, ikind, j, &
191 : natom, nkind, start_idx, &
192 : total_grid_npts
193 10 : INTEGER, ALLOCATABLE :: atom_to_kind(:), ri_rs_grid_offsets(:)
194 : REAL(KIND=dp) :: atomic_center(3)
195 10 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
196 10 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
197 :
198 10 : CALL timeset(routineN, handle)
199 :
200 : !! Get the information about the atoms in the system
201 10 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
202 :
203 10 : nkind = SIZE(atomic_kind_set)
204 10 : natom = SIZE(particle_set)
205 :
206 : !! 1. Build the grid cache
207 10 : CALL build_grid_cache(bs_env, atomic_kind_set)
208 :
209 : !! 2. Calculate grid counts and offsets
210 30 : ALLOCATE (ri_rs_grid_offsets(natom + 1))
211 30 : ALLOCATE (atom_to_kind(natom))
212 :
213 10 : total_grid_npts = 0
214 28 : DO ikind = 1, nkind
215 56 : DO j = 1, SIZE(atomic_kind_set(ikind)%atom_list)
216 28 : atom_idx = atomic_kind_set(ikind)%atom_list(j)
217 28 : atom_to_kind(atom_idx) = ikind
218 28 : ri_rs_grid_offsets(atom_idx) = total_grid_npts + 1
219 46 : total_grid_npts = total_grid_npts + bs_env%ri_rs%grid_cache(ikind)%npts
220 : END DO
221 : END DO
222 :
223 10 : ri_rs_grid_offsets(natom + 1) = total_grid_npts + 1
224 :
225 10 : IF (bs_env%unit_nr > 0) THEN
226 : WRITE (bs_env%unit_nr, FMT="(T2,A,T69,I12)") &
227 5 : 'Total grid points used for RI-RS:', total_grid_npts
228 5 : WRITE (bs_env%unit_nr, "(A)") ' '
229 : END IF
230 :
231 : !! 3. Allocate the global ri_rs_grid arrays
232 30 : ALLOCATE (ri_rs_grid_points(3, total_grid_npts))
233 :
234 : !! 4. Parallelize the grid generation loop
235 : !$OMP PARALLEL DO DEFAULT(NONE) &
236 : !$OMP SHARED(ri_rs_grid_points, ri_rs_grid_offsets, atom_to_kind, &
237 : !$OMP particle_set, bs_env, natom) &
238 : !$OMP PRIVATE(atom_idx, ikind, atomic_center, start_idx, end_idx) &
239 10 : !$OMP SCHEDULE(DYNAMIC, 1)
240 : DO atom_idx = 1, natom
241 : ikind = atom_to_kind(atom_idx)
242 : atomic_center(:) = particle_set(atom_idx)%r(:)
243 :
244 : start_idx = ri_rs_grid_offsets(atom_idx)
245 : ! offsets are filled in kind-grouped order, so offsets(atom_idx+1) belongs to an
246 : ! unrelated atom when elements are interleaved in the geometry; span from own kind
247 : end_idx = start_idx + bs_env%ri_rs%grid_cache(ikind)%npts - 1
248 :
249 : !! Shift the cached origin grid by the atom's center
250 : ri_rs_grid_points(1, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(1, :) + atomic_center(1)
251 : ri_rs_grid_points(2, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(2, :) + atomic_center(2)
252 : ri_rs_grid_points(3, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(3, :) + atomic_center(3)
253 :
254 : END DO
255 : !$OMP END PARALLEL DO
256 :
257 : !! 5. Cleanup memory
258 10 : IF (ALLOCATED(bs_env%ri_rs%grid_cache)) THEN
259 28 : DO ikind = 1, nkind
260 28 : IF (ALLOCATED(bs_env%ri_rs%grid_cache(ikind)%raw_points)) DEALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points)
261 : END DO
262 28 : DEALLOCATE (bs_env%ri_rs%grid_cache)
263 : END IF
264 :
265 10 : DEALLOCATE (atom_to_kind, ri_rs_grid_offsets)
266 10 : CALL timestop(handle)
267 :
268 20 : END SUBROUTINE ri_rs_grid_assembler
269 :
270 : ! **************************************************************************************************
271 : !> \brief Reads grids from .ion files and stores them in memory based on grid_select
272 : !> \param bs_env ...
273 : !> \param atomic_kind_set ...
274 : ! **************************************************************************************************
275 :
276 10 : SUBROUTINE build_grid_cache(bs_env, atomic_kind_set)
277 :
278 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
279 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
280 :
281 : CHARACTER(LEN=256) :: atom_sym, filename, full_path, line, &
282 : suffix
283 : INTEGER :: colon_idx, i, ierr, ikind, iunit, nkind
284 : REAL(KIND=dp) :: pt(3)
285 :
286 : ! Determine the file suffix based on the user's choice
287 10 : IF (bs_env%ri_rs%grid_select == 1) THEN
288 10 : suffix = "_def2-tzvp-rs.ion"
289 0 : ELSE IF (bs_env%ri_rs%grid_select == 2) THEN
290 0 : suffix = "_cc-pvtz-rs.ion"
291 : ELSE
292 0 : CPABORT("Unknown grid_select value. Valid options are 1 (def2-TZVPP) or 2 (cc-pVTZ).")
293 : END IF
294 :
295 10 : nkind = SIZE(atomic_kind_set)
296 48 : IF (.NOT. ALLOCATED(bs_env%ri_rs%grid_cache)) ALLOCATE (bs_env%ri_rs%grid_cache(nkind))
297 :
298 28 : DO ikind = 1, nkind
299 18 : atom_sym = TRIM(atomic_kind_set(ikind)%element_symbol)
300 18 : filename = TRIM(atom_sym)//TRIM(suffix)
301 :
302 18 : full_path = "ri_rs_grid/"//TRIM(filename)
303 :
304 : CALL open_file(file_name=TRIM(full_path), unit_number=iunit, &
305 18 : file_action="READ", file_status="OLD")
306 :
307 : ! Parse the preamble for 'n points'
308 18 : bs_env%ri_rs%grid_cache(ikind)%npts = 0
309 : DO
310 612 : READ (iunit, '(A)', IOSTAT=ierr) line
311 612 : IF (ierr /= 0) EXIT
312 612 : IF (INDEX(line, 'n points') > 0) THEN
313 18 : colon_idx = INDEX(line, ':')
314 18 : READ (line(colon_idx + 1:), *) bs_env%ri_rs%grid_cache(ikind)%npts
315 18 : EXIT
316 : END IF
317 : END DO
318 :
319 : ! Allocate the cache array for this specific element
320 54 : ALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points(3, bs_env%ri_rs%grid_cache(ikind)%npts))
321 :
322 : ! Fast-forward to the <grid_points> tag
323 18 : REWIND (iunit)
324 : DO
325 648 : READ (iunit, '(A)', IOSTAT=ierr) line
326 648 : IF (ierr /= 0) EXIT
327 648 : IF (INDEX(line, '<grid_points>') > 0) EXIT
328 : END DO
329 :
330 : ! Read the raw grid coordinates
331 4466 : DO i = 1, bs_env%ri_rs%grid_cache(ikind)%npts
332 4448 : READ (iunit, *, IOSTAT=ierr) pt(1), pt(2), pt(3)
333 4448 : IF (ierr /= 0) THEN
334 0 : CPABORT("Unexpected EOF in grid file ")
335 : END IF
336 17810 : bs_env%ri_rs%grid_cache(ikind)%raw_points(:, i) = pt(:)
337 : END DO
338 :
339 28 : CALL close_file(unit_number=iunit)
340 : END DO
341 :
342 10 : END SUBROUTINE build_grid_cache
343 :
344 : ! **************************************************************************************************
345 : !> \brief Evaluates atomic basis functions on a real-space grid and builds a sparse DBCSR matrix.
346 : !> \param qs_env ...
347 : !> \param bs_env ...
348 : !> \param ri_rs_grid_points ...
349 : !> \param mat_phi_mu_l ...
350 : ! **************************************************************************************************
351 :
352 10 : SUBROUTINE atomic_basis_at_grid_point(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l)
353 :
354 : TYPE(qs_environment_type), POINTER :: qs_env
355 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
356 : REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
357 : TYPE(dbcsr_type), INTENT(OUT) :: mat_phi_mu_l
358 :
359 : CHARACTER(LEN=*), PARAMETER :: routineN = 'atomic_basis_at_grid_point'
360 :
361 : INTEGER :: c_size, chunk_size, dimen_ORB, handle, i, i_blk, iatom, natom, npcol, nprow, &
362 : num_grid_chunks, r_end, r_start, total_grid_npts
363 : INTEGER, ALLOCATABLE, DIMENSION(:) :: first_sgf
364 10 : INTEGER, DIMENSION(:), POINTER :: c_blk_sizes, col_dist, col_dist_ks, &
365 10 : r_blk_sizes, row_dist, row_dist_ks
366 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atom_col_buffer
367 10 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
368 : TYPE(cell_type), POINTER :: cell
369 : TYPE(dbcsr_distribution_type) :: dist
370 : TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist_ks
371 : TYPE(mp_para_env_type), POINTER :: para_env
372 10 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
373 10 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
374 :
375 10 : CALL timeset(routineN, handle)
376 :
377 : ! Setup Grid Blocking
378 : ! Right now using 256
379 10 : chunk_size = bs_env%ri_rs%chunk_size_dbcsr
380 :
381 : ! Extract environment variables
382 : CALL get_qs_env(qs_env, cell=cell, atomic_kind_set=atomic_kind_set, &
383 : qs_kind_set=qs_kind_set, particle_set=particle_set, &
384 10 : para_env=para_env)
385 :
386 10 : natom = SIZE(particle_set)
387 10 : total_grid_npts = SIZE(ri_rs_grid_points, 2)
388 :
389 : ! Map the starting indices of spherical gaussian functions (SGF) for each atom
390 30 : ALLOCATE (first_sgf(natom + 1))
391 10 : CALL get_basis_offsets(particle_set, qs_kind_set, first_sgf, dimen_ORB)
392 :
393 : ! =========================================================================
394 : ! 1. SETUP DBCSR MATRIX TOPOLOGY
395 : ! =========================================================================
396 :
397 : ! A. Define Column Block Sizes (1 Block = 1 Atom's full basis set)
398 30 : ALLOCATE (c_blk_sizes(natom))
399 38 : DO iatom = 1, natom
400 38 : c_blk_sizes(iatom) = first_sgf(iatom + 1) - first_sgf(iatom)
401 : END DO
402 :
403 : ! B. Define Row Block Sizes (Grid chunks of max size 256)
404 10 : num_grid_chunks = CEILING(REAL(total_grid_npts, KIND=dp)/REAL(chunk_size, KIND=dp))
405 30 : ALLOCATE (r_blk_sizes(num_grid_chunks))
406 40 : r_blk_sizes = chunk_size
407 10 : IF (MOD(total_grid_npts, chunk_size) /= 0) THEN
408 10 : r_blk_sizes(num_grid_chunks) = MOD(total_grid_npts, chunk_size)
409 : END IF
410 :
411 : ! C. Fetch CP2K's Default Process Grid Configuration
412 10 : CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist_ks)
413 10 : CALL dbcsr_distribution_get(dbcsr_dist_ks, row_dist=row_dist_ks, col_dist=col_dist_ks)
414 :
415 : ! D. Build Custom Mappings using Round-Robin across the 2D process grid
416 : ! (MAXVAL + 1 accounts for 0-based process indexing in DBCSR)
417 38 : nprow = MAXVAL(row_dist_ks) + 1
418 38 : npcol = MAXVAL(col_dist_ks) + 1
419 :
420 20 : ALLOCATE (row_dist(num_grid_chunks))
421 40 : DO i = 1, num_grid_chunks
422 40 : row_dist(i) = MOD(i - 1, nprow)
423 : END DO
424 :
425 20 : ALLOCATE (col_dist(natom))
426 38 : DO i = 1, natom
427 38 : col_dist(i) = MOD(i - 1, npcol)
428 : END DO
429 :
430 : ! E. Create the DBCSR Distribution and Initialize the Matrix
431 : CALL dbcsr_distribution_new(dist, template=dbcsr_dist_ks, &
432 10 : row_dist=row_dist, col_dist=col_dist)
433 :
434 : CALL dbcsr_create(mat_phi_mu_l, name="phi_val_sparse", dist=dist, &
435 : matrix_type=dbcsr_type_no_symmetry, &
436 10 : row_blk_size=r_blk_sizes, col_blk_size=c_blk_sizes)
437 :
438 : ! =========================================================================
439 : ! 2. STREAM DATA DIRECTLY INTO SPARSE MATRIX
440 : ! =========================================================================
441 : ! Iterate over the atoms assigned to this specific MPI rank
442 10 : DO iatom = para_env%mepos + 1, natom, para_env%num_pe
443 :
444 14 : c_size = c_blk_sizes(iatom)
445 :
446 : ! Allocate a temporary dense buffer just for this specific atom
447 56 : ALLOCATE (atom_col_buffer(total_grid_npts, c_size))
448 14 : atom_col_buffer = 0.0_dp
449 :
450 : ! Evaluate the basis functions on the grid
451 : CALL fill_phi_for_atom(atom_col_buffer, ri_rs_grid_points, total_grid_npts, &
452 14 : iatom, particle_set, qs_kind_set, cell)
453 :
454 : ! Slice the dense column into chunks and insert into DBCSR
455 56 : DO i_blk = 1, num_grid_chunks
456 42 : r_start = (i_blk - 1)*chunk_size + 1
457 42 : r_end = MIN(i_blk*chunk_size, total_grid_npts)
458 :
459 : ! Apply dynamic sparsity filtering: Only store blocks with physical significance
460 23914 : IF (MAXVAL(ABS(atom_col_buffer(r_start:r_end, 1:c_size))) > bs_env%eps_filter) THEN
461 : CALL dbcsr_put_block(mat_phi_mu_l, row=i_blk, col=iatom, &
462 42 : block=atom_col_buffer(r_start:r_end, 1:c_size))
463 : END IF
464 : END DO
465 :
466 14 : DEALLOCATE (atom_col_buffer)
467 :
468 : END DO
469 :
470 : ! Finalize triggers internal MPI communication to route blocks to their correct 2D process owners
471 10 : CALL dbcsr_finalize(mat_phi_mu_l)
472 :
473 : ! -------------------------------------------------------------------------
474 : ! CLEANUP
475 : ! -------------------------------------------------------------------------
476 10 : DEALLOCATE (first_sgf, r_blk_sizes, c_blk_sizes, row_dist, col_dist)
477 10 : CALL dbcsr_distribution_release(dist)
478 :
479 10 : CALL timestop(handle)
480 :
481 40 : END SUBROUTINE atomic_basis_at_grid_point
482 :
483 : ! **************************************************************************************************
484 : !> \brief Compute value of all basis function for single atom across all grid points
485 : !> \param phi_val ...
486 : !> \param ri_rs_grid ...
487 : !> \param npts ...
488 : !> \param iatom ...
489 : !> \param particle_set ...
490 : !> \param qs_kind_set ...
491 : !> \param cell ...
492 : ! **************************************************************************************************
493 :
494 14 : SUBROUTINE fill_phi_for_atom(phi_val, ri_rs_grid, npts, iatom, &
495 : particle_set, qs_kind_set, cell)
496 :
497 : REAL(KIND=dp), INTENT(INOUT) :: phi_val(:, :)
498 : INTEGER, INTENT(IN) :: npts
499 : REAL(KIND=dp), INTENT(IN) :: ri_rs_grid(3, npts)
500 : INTEGER, INTENT(IN) :: iatom
501 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
502 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
503 : TYPE(cell_type), POINTER :: cell
504 :
505 : INTEGER :: first_sgf, i_pt, ico, iend_co, ikind, &
506 : ipgf, iset, isgf, ishell, istart_co, &
507 : l, last_sgf, lx, ly, lz, n_cart_total, &
508 : row_idx
509 : REAL(KIND=dp) :: alpha, dist_vec(3), exp_val, poly, r2, &
510 : r_atom(3), weight
511 : TYPE(gto_basis_set_type), POINTER :: orb_basis_set
512 :
513 : ! Get Atom Info
514 14 : ikind = particle_set(iatom)%atomic_kind%kind_number
515 14 : CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
516 14 : IF (.NOT. ASSOCIATED(orb_basis_set)) RETURN
517 :
518 56 : r_atom = particle_set(iatom)%r
519 :
520 : !$OMP PARALLEL DO DEFAULT(NONE) &
521 : !$OMP SHARED(phi_val, ri_rs_grid, npts, orb_basis_set, r_atom, cell, ncoset, indco) &
522 : !$OMP PRIVATE(i_pt, dist_vec, r2, iset, n_cart_total, ishell, l, istart_co, iend_co, &
523 : !$OMP first_sgf, last_sgf, ipgf, alpha, exp_val, isgf, ico, row_idx, weight, &
524 : !$OMP lx, ly, lz, poly) &
525 14 : !$OMP SCHEDULE(STATIC)
526 : DO i_pt = 1, npts
527 : dist_vec = pbc(ri_rs_grid(:, i_pt) - r_atom, cell)
528 : r2 = DOT_PRODUCT(dist_vec, dist_vec)
529 :
530 : DO iset = 1, orb_basis_set%nset
531 : n_cart_total = ncoset(orb_basis_set%lmax(iset))
532 :
533 : DO ishell = 1, orb_basis_set%nshell(iset)
534 : l = orb_basis_set%l(ishell, iset)
535 : istart_co = ncoset(l - 1) + 1
536 : iend_co = ncoset(l)
537 :
538 : first_sgf = orb_basis_set%first_sgf(ishell, iset)
539 : last_sgf = orb_basis_set%last_sgf(ishell, iset)
540 :
541 : DO ipgf = 1, orb_basis_set%npgf(iset)
542 : alpha = orb_basis_set%zet(ipgf, iset)
543 : exp_val = EXP(-alpha*r2)
544 :
545 : DO isgf = first_sgf, last_sgf
546 : DO ico = istart_co, iend_co
547 : row_idx = (ipgf - 1)*n_cart_total + ico
548 : weight = orb_basis_set%sphi(row_idx, isgf)
549 : lx = indco(1, ico)
550 : ly = indco(2, ico)
551 : lz = indco(3, ico)
552 : poly = (dist_vec(1)**lx)*(dist_vec(2)**ly)*(dist_vec(3)**lz)
553 :
554 : phi_val(i_pt, isgf) = phi_val(i_pt, isgf) + (weight*poly*exp_val)
555 :
556 : END DO
557 : END DO
558 : END DO
559 : END DO
560 : END DO
561 : END DO
562 : !$OMP END PARALLEL DO
563 :
564 : END SUBROUTINE fill_phi_for_atom
565 :
566 : ! **************************************************************************************************
567 : !> \brief Helper for OMP threads to fill phi_val column values
568 : !> \param particle_set ...
569 : !> \param qs_kind_set ...
570 : !> \param first_sgf ...
571 : !> \param total_sgf ...
572 : ! **************************************************************************************************
573 :
574 10 : SUBROUTINE get_basis_offsets(particle_set, qs_kind_set, first_sgf, total_sgf)
575 :
576 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
577 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
578 : INTEGER, INTENT(OUT) :: first_sgf(:), total_sgf
579 :
580 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_basis_offsets'
581 :
582 : INTEGER :: handle, iatom, ikind, nsgf
583 :
584 10 : CALL timeset(routineN, handle)
585 :
586 10 : total_sgf = 0
587 38 : DO iatom = 1, SIZE(particle_set)
588 28 : first_sgf(iatom) = total_sgf + 1
589 28 : ikind = particle_set(iatom)%atomic_kind%kind_number
590 28 : CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf, basis_type="ORB")
591 38 : total_sgf = total_sgf + nsgf
592 : END DO
593 10 : first_sgf(SIZE(particle_set) + 1) = total_sgf + 1
594 :
595 10 : CALL timestop(handle)
596 :
597 10 : END SUBROUTINE get_basis_offsets
598 :
599 : ! **************************************************************************************************
600 : !> \brief Compute RI-RS Coefficients (Z_lP)
601 : !> \param qs_env ...
602 : !> \param bs_env ...
603 : !> \param ri_rs_grid_points ...
604 : !> \param mat_phi_mu_l ...
605 : !> \param mat_Z_lP ...
606 : ! **************************************************************************************************
607 :
608 10 : SUBROUTINE compute_coeff_Z_lP(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l, mat_Z_lP)
609 :
610 : ! Arguments
611 : TYPE(qs_environment_type), POINTER :: qs_env
612 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
613 : REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
614 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l
615 : TYPE(dbcsr_type), INTENT(OUT) :: mat_Z_lP
616 :
617 : CHARACTER(LEN=*), PARAMETER :: key = 'PROPERTIES%BANDSTRUCTURE%GW%PRINT%RESTART', &
618 : routineN = 'compute_coeff_Z_lP'
619 :
620 : INTEGER :: atom_P, chunk_size, col, current_chunk_size, global_i, global_j, group_handle, &
621 : handle, i, i_blk, info, istart_ao, j, j_ri, l, loc_idx, max_ao_size, max_loc_ri, &
622 : n_ao_total, n_grid_total, n_loc_ri, n_local_grid, natom, num_grid_chunks, r_end, r_start, &
623 : row
624 10 : INTEGER, ALLOCATABLE, DIMENSION(:) :: local_grid_idx, map_global_to_local, &
625 10 : row_offset
626 10 : INTEGER, DIMENSION(:), POINTER :: col_dist_phi, col_dist_ri, r_blk_sizes, &
627 10 : ri_blk_sizes, row_dist_grid
628 : REAL(KIND=dp) :: cutoff_ri, dist, t1
629 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: d_vec_local
630 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: D_local, d_lp_local, int_2d_local, &
631 10 : phi_global, phi_local, rho_pair_local, &
632 10 : Z_blk, Z_global_temp
633 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: int_3c_local
634 : REAL(KIND=dp), DIMENSION(3) :: pos_P
635 10 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: block_ptr
636 : TYPE(cp_logger_type), POINTER :: logger
637 : TYPE(dbcsr_distribution_type) :: dist_phi, dist_Z
638 : TYPE(dbcsr_iterator_type) :: iter
639 : TYPE(mp_para_env_type), POINTER :: para_env
640 10 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
641 : TYPE(section_vals_type), POINTER :: input
642 :
643 10 : CALL timeset(routineN, handle)
644 :
645 : ! Restrict integration domain to a localized sphere around the atom
646 10 : cutoff_ri = bs_env%ri_rs%cutoff_radius_ri_rs
647 :
648 10 : t1 = m_walltime()
649 :
650 10 : CALL get_qs_env(qs_env, para_env=para_env, particle_set=particle_set, input=input)
651 :
652 10 : natom = SIZE(bs_env%i_RI_start_from_atom)
653 10 : n_ao_total = bs_env%i_ao_end_from_atom(natom)
654 10 : n_grid_total = SIZE(ri_rs_grid_points, 2)
655 :
656 : ! =========================================================================
657 : ! 1. SETUP DBCSR TOPOLOGY & EXACT OFFSETS
658 : ! =========================================================================
659 10 : CALL dbcsr_get_info(mat_phi_mu_l, row_blk_size=r_blk_sizes, distribution=dist_phi)
660 10 : CALL dbcsr_distribution_get(dist_phi, row_dist=row_dist_grid, col_dist=col_dist_phi, group=group_handle)
661 :
662 10 : chunk_size = r_blk_sizes(1)
663 10 : num_grid_chunks = SIZE(r_blk_sizes)
664 :
665 30 : ALLOCATE (row_offset(num_grid_chunks))
666 10 : row_offset(1) = 0
667 30 : DO i_blk = 2, num_grid_chunks
668 30 : row_offset(i_blk) = row_offset(i_blk - 1) + r_blk_sizes(i_blk - 1)
669 : END DO
670 :
671 40 : ALLOCATE (ri_blk_sizes(natom), col_dist_ri(natom))
672 38 : DO atom_P = 1, natom
673 28 : ri_blk_sizes(atom_P) = bs_env%i_RI_end_from_atom(atom_P) - bs_env%i_RI_start_from_atom(atom_P) + 1
674 118 : col_dist_ri(atom_P) = MOD(atom_P - 1, MAXVAL(col_dist_phi) + 1)
675 : END DO
676 :
677 10 : CALL dbcsr_distribution_new(dist_Z, template=dist_phi, row_dist=row_dist_grid, col_dist=col_dist_ri)
678 :
679 10 : IF (bs_env%ri_rs%Z_lP_exists) THEN
680 : CALL dbcsr_binary_read(filepath=TRIM(bs_env%prefix)//"Z_lP.matrix", &
681 : distribution=dist_Z, &
682 2 : matrix_new=mat_Z_lP)
683 2 : IF (bs_env%unit_nr > 0) THEN
684 : WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
685 1 : 'Read Z_lP from file ', ' Execution time', m_walltime() - t1, ' s'
686 1 : WRITE (bs_env%unit_nr, '(A)') ' '
687 : END IF
688 : ELSE
689 :
690 : CALL dbcsr_create(mat_Z_lP, name="mat_Z_lP", dist=dist_Z, &
691 : matrix_type=dbcsr_type_no_symmetry, &
692 8 : row_blk_size=r_blk_sizes, col_blk_size=ri_blk_sizes)
693 :
694 8 : max_ao_size = 0
695 30 : DO j = 1, SIZE(bs_env%i_ao_start_from_atom)
696 30 : max_ao_size = MAX(max_ao_size, bs_env%i_ao_end_from_atom(j) - bs_env%i_ao_start_from_atom(j) + 1)
697 : END DO
698 30 : max_loc_ri = MAXVAL(ri_blk_sizes)
699 :
700 : ! Global mapping array: 4 bytes * N_grid_total (~4MB per million grid points)
701 24 : ALLOCATE (map_global_to_local(n_grid_total))
702 :
703 : ! =========================================================================
704 : ! 2. PRE-COMPUTE: EXTRACT FULL DBCSR TO DENSE GLOBAL ARRAY
705 : ! =========================================================================
706 32 : ALLOCATE (phi_global(n_grid_total, n_ao_total))
707 8 : phi_global = 0.0_dp
708 :
709 8 : CALL dbcsr_iterator_start(iter, mat_phi_mu_l)
710 41 : DO WHILE (dbcsr_iterator_blocks_left(iter))
711 33 : CALL dbcsr_iterator_next_block(iter, row, col, block_ptr)
712 :
713 33 : IF (col < 1 .OR. col > natom) CYCLE
714 33 : istart_ao = bs_env%i_ao_start_from_atom(col)
715 :
716 134 : DO j = 1, SIZE(block_ptr, 2)
717 93 : global_j = istart_ao + j - 1
718 93 : IF (global_j < 1 .OR. global_j > n_ao_total) CYCLE
719 :
720 19614 : DO i = 1, SIZE(block_ptr, 1)
721 19488 : global_i = row_offset(row) + i
722 19488 : IF (global_i < 1 .OR. global_i > n_grid_total) CYCLE
723 :
724 : ! Insert directly into the global dense array
725 19581 : phi_global(global_i, global_j) = block_ptr(i, j)
726 : END DO
727 : END DO
728 : END DO
729 8 : CALL dbcsr_iterator_stop(iter)
730 :
731 8 : CALL para_env%sum(phi_global)
732 :
733 : ! =========================================================================
734 : ! 3. MPI LOOP OVER ATOMS (Fully independent, no MPI barriers inside)
735 : ! This distributes the N_atoms among the available MPI processors.
736 : ! =========================================================================
737 8 : DO atom_P = para_env%mepos + 1, natom, para_env%num_pe
738 :
739 11 : n_loc_ri = ri_blk_sizes(atom_P)
740 44 : pos_P(:) = particle_set(atom_P)%r(:)
741 :
742 : ! ---------------------------------------------------------------------
743 : ! A. Determine Local Grid Domain based on cutoff_ri
744 : ! ---------------------------------------------------------------------
745 11 : n_local_grid = 0
746 6827 : DO l = 1, n_grid_total
747 27264 : dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
748 6827 : IF (dist <= cutoff_ri) n_local_grid = n_local_grid + 1
749 : END DO
750 :
751 33 : ALLOCATE (local_grid_idx(n_local_grid))
752 11 : map_global_to_local = 0
753 :
754 11 : n_local_grid = 0
755 6827 : DO l = 1, n_grid_total
756 27264 : dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
757 6827 : IF (dist <= cutoff_ri) THEN
758 6816 : n_local_grid = n_local_grid + 1
759 6816 : local_grid_idx(n_local_grid) = l
760 6816 : map_global_to_local(l) = n_local_grid
761 : END IF
762 : END DO
763 :
764 : ! ---------------------------------------------------------------------
765 : ! B. Smart Fetch of Local Phi (MPI-Safe & Bounds-Checked)
766 : ! ---------------------------------------------------------------------
767 44 : ALLOCATE (phi_local(n_local_grid, n_ao_total))
768 :
769 : !$OMP PARALLEL DO DEFAULT(NONE) &
770 : !$OMP SHARED(n_ao_total, n_local_grid, local_grid_idx, phi_local, phi_global) &
771 : !$OMP PRIVATE(global_j, loc_idx, global_i) &
772 11 : !$OMP SCHEDULE(STATIC)
773 : DO global_j = 1, n_ao_total
774 : DO loc_idx = 1, n_local_grid
775 : global_i = local_grid_idx(loc_idx)
776 : phi_local(loc_idx, global_j) = phi_global(global_i, global_j)
777 : END DO
778 : END DO
779 : !$OMP END PARALLEL DO
780 :
781 : ! ---------------------------------------------------------------------
782 : ! C. Build and Balance Local LHS Matrix (D_local)
783 : ! ---------------------------------------------------------------------
784 66 : ALLOCATE (D_local(n_local_grid, n_local_grid), d_vec_local(n_local_grid))
785 11 : D_local = 0.0_dp
786 :
787 11 : CALL dsyrk("L", "N", n_local_grid, n_ao_total, 1.0_dp, phi_local, n_local_grid, 0.0_dp, D_local, n_local_grid)
788 :
789 : !$OMP PARALLEL DO DEFAULT(NONE) &
790 : !$OMP SHARED(n_local_grid, D_local, d_vec_local, bs_env) &
791 : !$OMP PRIVATE(i) &
792 11 : !$OMP SCHEDULE(STATIC)
793 : DO i = 1, n_local_grid
794 : D_local(i, i) = D_local(i, i)**2
795 : d_vec_local(i) = 1.0_dp/SQRT(MAX(D_local(i, i), 1.0E-16_dp))
796 : D_local(i, i) = (D_local(i, i)*d_vec_local(i)**2) + bs_env%ri_rs%tikhonov
797 : END DO
798 : !$OMP END PARALLEL DO
799 :
800 : !$OMP PARALLEL DO DEFAULT(NONE) &
801 : !$OMP SHARED(n_local_grid, D_local, d_vec_local) &
802 : !$OMP PRIVATE(j, i) &
803 11 : !$OMP SCHEDULE(DYNAMIC)
804 : DO j = 1, n_local_grid
805 : DO i = j + 1, n_local_grid
806 : D_local(i, j) = D_local(i, j)**2
807 : D_local(i, j) = D_local(i, j)*d_vec_local(i)*d_vec_local(j)
808 : D_local(j, i) = D_local(i, j)
809 : END DO
810 : END DO
811 : !$OMP END PARALLEL DO
812 :
813 : ! ---------------------------------------------------------------------
814 : ! D. Build Local RHS Matrix (d_lp_local)
815 : ! ---------------------------------------------------------------------
816 44 : ALLOCATE (d_lp_local(n_local_grid, n_loc_ri))
817 44 : ALLOCATE (rho_pair_local(n_local_grid, max_ao_size*max_ao_size))
818 55 : ALLOCATE (int_3c_local(max_ao_size, max_ao_size, n_loc_ri))
819 44 : ALLOCATE (int_2d_local(max_ao_size*max_ao_size, n_loc_ri))
820 :
821 11 : d_lp_local = 0.0_dp
822 :
823 : CALL compute_d_lp(qs_env, bs_env, phi_local, d_lp_local, n_local_grid, &
824 11 : n_loc_ri, atom_P, rho_pair_local, int_3c_local, int_2d_local, max_ao_size)
825 :
826 : !$OMP PARALLEL DO DEFAULT(NONE) &
827 : !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
828 : !$OMP PRIVATE(j_ri, i) &
829 11 : !$OMP SCHEDULE(STATIC)
830 : DO j_ri = 1, n_loc_ri
831 : DO i = 1, n_local_grid
832 : d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
833 : END DO
834 : END DO
835 : !$OMP END PARALLEL DO
836 :
837 : ! ---------------------------------------------------------------------
838 : ! E. Fast Local LAPACK Solve
839 : ! ---------------------------------------------------------------------
840 :
841 11 : CALL dpotrf('L', n_local_grid, D_local, n_local_grid, info)
842 :
843 11 : CALL dpotrs('L', n_local_grid, n_loc_ri, D_local, n_local_grid, d_lp_local, n_local_grid, info)
844 :
845 : !$OMP PARALLEL DO DEFAULT(NONE) &
846 : !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
847 : !$OMP PRIVATE(j_ri, i) &
848 11 : !$OMP SCHEDULE(STATIC)
849 : DO j_ri = 1, n_loc_ri
850 : DO i = 1, n_local_grid
851 : d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
852 : END DO
853 : END DO
854 : !$OMP END PARALLEL DO
855 :
856 : ! ---------------------------------------------------------------------
857 : ! F. Scatter Local Solution Back to Global DBCSR Matrix
858 : ! ---------------------------------------------------------------------
859 44 : ALLOCATE (Z_global_temp(n_grid_total, n_loc_ri))
860 11 : Z_global_temp = 0.0_dp
861 :
862 : !$OMP PARALLEL DO DEFAULT(NONE) &
863 : !$OMP SHARED(n_local_grid, local_grid_idx, Z_global_temp, n_loc_ri, d_lp_local) &
864 : !$OMP PRIVATE(loc_idx, global_i) &
865 11 : !$OMP SCHEDULE(STATIC)
866 : DO loc_idx = 1, n_local_grid
867 : global_i = local_grid_idx(loc_idx)
868 : Z_global_temp(global_i, 1:n_loc_ri) = d_lp_local(loc_idx, 1:n_loc_ri)
869 : END DO
870 : !$OMP END PARALLEL DO
871 :
872 77 : ALLOCATE (Z_blk(MAXVAL(r_blk_sizes), n_loc_ri))
873 :
874 44 : DO i_blk = 1, num_grid_chunks
875 33 : r_start = row_offset(i_blk) + 1
876 33 : r_end = row_offset(i_blk) + r_blk_sizes(i_blk)
877 33 : current_chunk_size = r_blk_sizes(i_blk)
878 :
879 33 : Z_blk = 0.0_dp
880 53376 : Z_blk(1:current_chunk_size, 1:n_loc_ri) = Z_global_temp(r_start:r_end, 1:n_loc_ri)
881 :
882 53387 : IF (MAXVAL(ABS(Z_blk(1:current_chunk_size, 1:n_loc_ri))) > bs_env%eps_filter) THEN
883 : CALL dbcsr_put_block(mat_Z_lP, row=i_blk, col=atom_P, &
884 33 : block=Z_blk(1:current_chunk_size, 1:n_loc_ri))
885 : END IF
886 : END DO
887 :
888 11 : DEALLOCATE (Z_global_temp, Z_blk)
889 11 : DEALLOCATE (D_local, d_vec_local, d_lp_local)
890 11 : DEALLOCATE (rho_pair_local, int_3c_local, int_2d_local)
891 :
892 11 : DEALLOCATE (local_grid_idx, phi_local)
893 :
894 : END DO
895 :
896 : ! Clean up the massive global array once the loop finishes
897 8 : DEALLOCATE (phi_global)
898 :
899 8 : CALL dbcsr_finalize(mat_Z_lP)
900 :
901 8 : IF (bs_env%unit_nr > 0) THEN
902 : WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
903 4 : 'Computed Z_lP ', ' Execution time', m_walltime() - t1, ' s'
904 4 : WRITE (bs_env%unit_nr, '(A)') ' '
905 : END IF
906 :
907 8 : logger => cp_get_default_logger()
908 :
909 8 : IF (BTEST(cp_print_key_should_output(logger%iter_info, input, key), cp_p_file)) THEN
910 0 : CALL dbcsr_binary_write(matrix=mat_Z_lP, filepath=TRIM(bs_env%prefix)//"Z_lP.matrix")
911 : END IF
912 :
913 16 : DEALLOCATE (map_global_to_local)
914 :
915 : END IF
916 :
917 10 : DEALLOCATE (row_offset, ri_blk_sizes, col_dist_ri)
918 10 : CALL dbcsr_distribution_release(dist_Z)
919 :
920 10 : DEALLOCATE (ri_rs_grid_points)
921 :
922 10 : CALL timestop(handle)
923 :
924 40 : END SUBROUTINE compute_coeff_Z_lP
925 :
926 : ! **************************************************************************************************
927 : !> \brief Computes the dense localized Right-Hand Side (RHS) matrix d_lp
928 : !> \param qs_env ...
929 : !> \param bs_env ...
930 : !> \param phi_val ...
931 : !> \param d_lp ...
932 : !> \param n_grid_total ...
933 : !> \param n_loc_ri ...
934 : !> \param atom_P ...
935 : !> \param rho_pair ...
936 : !> \param int_3c ...
937 : !> \param int_2d ...
938 : !> \param max_ao_size ...
939 : ! **************************************************************************************************
940 :
941 44 : SUBROUTINE compute_d_lp(qs_env, bs_env, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, &
942 11 : rho_pair, int_3c, int_2d, max_ao_size)
943 :
944 : TYPE(qs_environment_type), POINTER :: qs_env
945 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
946 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: phi_val
947 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: d_lp
948 : INTEGER, INTENT(IN) :: n_grid_total, n_loc_ri, atom_P
949 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: rho_pair
950 : REAL(KIND=dp), DIMENSION(:, :, :), INTENT(INOUT) :: int_3c
951 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: int_2d
952 : INTEGER, INTENT(IN) :: max_ao_size
953 :
954 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_d_lp'
955 :
956 : INTEGER :: atom_j, atom_k, handle, j, jk_idx, &
957 : jsize, jstart, k, ksize, kstart, l, ri
958 :
959 11 : CALL timeset(routineN, handle)
960 :
961 42 : DO atom_j = 1, SIZE(bs_env%i_ao_start_from_atom)
962 31 : jstart = bs_env%i_ao_start_from_atom(atom_j)
963 31 : jsize = bs_env%i_ao_end_from_atom(atom_j) - jstart + 1
964 :
965 131 : DO atom_k = 1, SIZE(bs_env%i_ao_start_from_atom)
966 89 : kstart = bs_env%i_ao_start_from_atom(atom_k)
967 89 : ksize = bs_env%i_ao_end_from_atom(atom_k) - kstart + 1
968 :
969 7794 : int_3c(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
970 :
971 : ! Compute B_{μν,P} = (μν|P)
972 : CALL build_3c_integral_block(int_3c(1:jsize, 1:ksize, 1:n_loc_ri), &
973 : qs_env, bs_env%ri_metric, &
974 : basis_j=bs_env%basis_set_AO, &
975 : basis_k=bs_env%basis_set_AO, &
976 : basis_i=bs_env%basis_set_RI, &
977 89 : atom_k=atom_k, atom_j=atom_j, atom_i=atom_P)
978 :
979 : ! Build Pair Density: ρ(l, μν) = Φ_μ(r_l) \times Φ_ν(r_l)
980 : !$OMP PARALLEL DO DEFAULT(NONE) &
981 : !$OMP SHARED(ksize, jsize, n_grid_total, rho_pair, phi_val, jstart, kstart) &
982 : !$OMP PRIVATE(k, j, l, jk_idx) &
983 89 : !$OMP COLLAPSE(2)
984 : DO k = 1, ksize
985 : DO j = 1, jsize
986 : jk_idx = (k - 1)*jsize + j
987 : DO l = 1, n_grid_total
988 : rho_pair(l, jk_idx) = phi_val(l, jstart + j - 1)*phi_val(l, kstart + k - 1)
989 : END DO
990 : END DO
991 : END DO
992 : !$OMP END PARALLEL DO
993 :
994 : ! Flatten 3D B_{μν, P} tensor to 2D B_{(μν), P} matrix for BLAS
995 : !$OMP PARALLEL DO DEFAULT(NONE) &
996 : !$OMP SHARED(n_loc_ri, ksize, jsize, int_2d, int_3c) &
997 : !$OMP PRIVATE(ri, k, j, jk_idx) &
998 89 : !$OMP COLLAPSE(2)
999 : DO ri = 1, n_loc_ri
1000 : DO k = 1, ksize
1001 : DO j = 1, jsize
1002 : jk_idx = (k - 1)*jsize + j
1003 : int_2d(jk_idx, ri) = int_3c(j, k, ri)
1004 : END DO
1005 : END DO
1006 : END DO
1007 : !$OMP END PARALLEL DO
1008 :
1009 : ! Perform Contraction: d_{l, P} += ρ(l, μν) \times B_{(μν), P}
1010 : CALL dgemm("N", "N", n_grid_total, n_loc_ri, jsize*ksize, &
1011 : 1.0_dp, rho_pair, n_grid_total, &
1012 : int_2d, max_ao_size*max_ao_size, &
1013 120 : 1.0_dp, d_lp(1, 1), n_grid_total)
1014 : END DO
1015 : END DO
1016 :
1017 11 : CALL timestop(handle)
1018 :
1019 11 : END SUBROUTINE compute_d_lp
1020 :
1021 : ! **************************************************************************************************
1022 : !> \brief Computes the χ(iτ, k=0) matrix
1023 : !> \param bs_env ...
1024 : !> \param mat_chi_Gamma_tau ...
1025 : !> \param mat_phi_mu_l ...
1026 : !> \param mat_Z_lP ...
1027 : ! **************************************************************************************************
1028 :
1029 10 : SUBROUTINE get_mat_chi_Gamma_tau(bs_env, mat_chi_Gamma_tau, mat_phi_mu_l, mat_Z_lP)
1030 :
1031 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1032 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_chi_Gamma_tau
1033 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1034 :
1035 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_mat_chi_Gamma_tau'
1036 :
1037 : INTEGER :: handle, i, i_t, ispin, npcol
1038 10 : INTEGER, DIMENSION(:), POINTER :: blk_ao, blk_grid, dist_col_ao, &
1039 10 : dist_col_grid, dist_row_grid
1040 : REAL(KIND=dp) :: t1, tau
1041 : TYPE(dbcsr_distribution_type) :: dist_grid_grid, dist_phi
1042 : TYPE(dbcsr_type) :: matrix_chi_grid, matrix_chi_grid_spin, &
1043 : matrix_G_occ_grid, matrix_G_vir_grid
1044 :
1045 10 : CALL timeset(routineN, handle)
1046 :
1047 : ! =========================================================================
1048 : ! 1. SETUP CORE TOPOLOGIES
1049 : ! =========================================================================
1050 10 : CALL dbcsr_get_info(mat_phi_mu_l, distribution=dist_phi, row_blk_size=blk_grid, col_blk_size=blk_ao)
1051 10 : CALL dbcsr_distribution_get(dist_phi, row_dist=dist_row_grid, col_dist=dist_col_ao)
1052 :
1053 : ! Determine number of MPI process columns
1054 38 : npcol = MAXVAL(dist_col_ao) + 1
1055 :
1056 : ! Build a perfectly safe column distribution for the Grid dimension
1057 30 : ALLOCATE (dist_col_grid(SIZE(blk_grid)))
1058 40 : DO i = 1, SIZE(blk_grid)
1059 40 : dist_col_grid(i) = MOD(i - 1, npcol)
1060 : END DO
1061 :
1062 : CALL dbcsr_distribution_new(dist_grid_grid, template=dist_phi, &
1063 10 : row_dist=dist_row_grid, col_dist=dist_col_grid)
1064 :
1065 10 : CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1066 10 : CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1067 10 : CALL dbcsr_create(matrix_chi_grid, "chi_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1068 10 : CALL dbcsr_create(matrix_chi_grid_spin, "chi_grid_spin", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1069 :
1070 : ! =========================================================================
1071 : ! 2. MAIN IMAGINARY TIME LOOP
1072 : ! =========================================================================
1073 120 : DO i_t = 1, bs_env%num_time_freq_points
1074 110 : t1 = m_walltime()
1075 :
1076 110 : tau = bs_env%imag_time_points(i_t)
1077 110 : CALL dbcsr_set(matrix_chi_grid, 0.0_dp)
1078 :
1079 : ! ----------------------------------------------------------------------
1080 : ! A. SPIN LOOP (Allocations safely encapsulated in wrappers)
1081 : ! ----------------------------------------------------------------------
1082 240 : DO ispin = 1, bs_env%n_spin
1083 :
1084 : ! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1085 : ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1086 : CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, &
1087 130 : matrix_G_occ_grid, bs_env%eps_filter)
1088 :
1089 : ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1090 : ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1091 : CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, &
1092 130 : matrix_G_vir_grid, bs_env%eps_filter)
1093 :
1094 : ! -------------------------------------------------------------------
1095 : ! B. ELEMENT-WISE HADAMARD PRODUCT
1096 : ! -------------------------------------------------------------------
1097 : ! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
1098 130 : CALL hadamard_product(matrix_G_occ_grid, matrix_G_vir_grid, matrix_chi_grid_spin, bs_env%spin_degeneracy)
1099 :
1100 : ! Accumulate spin contributions
1101 240 : CALL dbcsr_add(matrix_chi_grid, matrix_chi_grid_spin, 1.0_dp, 1.0_dp)
1102 :
1103 : END DO ! ispin
1104 :
1105 : ! ----------------------------------------------------------------------
1106 : ! C. TRANSFORM TO AUXILIARY BASIS & EXPORT DIRECTLY
1107 : ! χ_aux = Z^T * χ_grid * Z
1108 : ! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
1109 : ! Result is dumped directly into the final array mat_chi_Gamma_tau!
1110 : ! ----------------------------------------------------------------------
1111 : CALL contract_A_B_A("T", "N", mat_Z_lP, matrix_chi_grid, &
1112 110 : mat_chi_Gamma_tau(i_t)%matrix, bs_env%eps_filter)
1113 :
1114 120 : IF (bs_env%unit_nr > 0) THEN
1115 : WRITE (bs_env%unit_nr, '(T2,A,I13,A,I3,A,F7.1,A)') &
1116 55 : 'Computed χ(iτ,k=0) for time point', i_t, ' /', bs_env%num_time_freq_points, &
1117 110 : ', Execution time', m_walltime() - t1, ' s'
1118 : END IF
1119 :
1120 : END DO ! i_t
1121 :
1122 : ! =========================================================================
1123 : ! 3. FINAL CLEANUP
1124 : ! =========================================================================
1125 10 : CALL dbcsr_release(matrix_G_occ_grid)
1126 10 : CALL dbcsr_release(matrix_G_vir_grid)
1127 10 : CALL dbcsr_release(matrix_chi_grid)
1128 10 : CALL dbcsr_release(matrix_chi_grid_spin)
1129 10 : CALL dbcsr_distribution_release(dist_grid_grid)
1130 10 : DEALLOCATE (dist_col_grid)
1131 :
1132 10 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1133 :
1134 10 : CALL timestop(handle)
1135 :
1136 20 : END SUBROUTINE get_mat_chi_Gamma_tau
1137 :
1138 : ! **************************************************************************************************
1139 : !> \brief Computes Green's Function in grid basis
1140 : !> \param bs_env ...
1141 : !> \param tau ...
1142 : !> \param ispin ...
1143 : !> \param occ ...
1144 : !> \param vir ...
1145 : !> \param mat_phi_mu_l ...
1146 : !> \param matrix_G_grid ...
1147 : !> \param eps_filter ...
1148 : ! **************************************************************************************************
1149 :
1150 1064 : SUBROUTINE build_G_grid(bs_env, tau, ispin, occ, vir, mat_phi_mu_l, matrix_G_grid, eps_filter)
1151 :
1152 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1153 : REAL(KIND=dp), INTENT(IN) :: tau
1154 : INTEGER, INTENT(IN) :: ispin
1155 : LOGICAL, INTENT(IN) :: occ, vir
1156 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, matrix_G_grid
1157 : REAL(KIND=dp), INTENT(IN) :: eps_filter
1158 :
1159 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_G_grid'
1160 :
1161 : INTEGER :: handle
1162 532 : INTEGER, DIMENSION(:), POINTER :: blk_ao, dist_row_ao
1163 : TYPE(cp_fm_type), POINTER :: fm_G
1164 : TYPE(dbcsr_distribution_type) :: dist_ao_ao
1165 : TYPE(dbcsr_type) :: matrix_G_ao
1166 :
1167 532 : CALL timeset(routineN, handle)
1168 :
1169 : ! 1. Select the correct FM matrix based on occ/vir flags
1170 532 : IF (occ) THEN
1171 272 : fm_G => bs_env%fm_Gocc
1172 : ELSE
1173 260 : fm_G => bs_env%fm_Gvir
1174 : END IF
1175 :
1176 : ! 2. Compute Dense FM Green's Function
1177 : ! G^occ/vir_µλ(i|τ|,k=0) = sum_G^occ/vir_µλn^occ/vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1178 532 : CALL G_occ_vir(bs_env, tau, fm_G, ispin, occ=occ, vir=vir)
1179 :
1180 : ! 3. Setup AO DBCSR Topology and Create Matrix dynamically
1181 532 : CALL setup_square_topology(mat_phi_mu_l, 'COL', dist_ao_ao, blk_ao, dist_row_ao)
1182 :
1183 : CALL dbcsr_create(matrix_G_ao, name="G_ao", dist=dist_ao_ao, &
1184 : matrix_type=dbcsr_type_no_symmetry, &
1185 532 : row_blk_size=blk_ao, col_blk_size=blk_ao)
1186 :
1187 : ! 4. Convert FM to Sparse DBCSR
1188 532 : CALL copy_fm_to_dbcsr(fm_G, matrix_G_ao, keep_sparsity=.FALSE.)
1189 :
1190 : ! 5. Transform to Grid Basis: G_grid = phi * G_ao * phi^T
1191 : ! G^occ/vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ/vir_µν Φ_ν(r_l')
1192 532 : CALL contract_A_B_A("N", "T", mat_phi_mu_l, matrix_G_ao, matrix_G_grid, eps_filter)
1193 :
1194 : ! 6. Release AO matrix and topology
1195 532 : CALL release_dbcsr_topology_and_matrices(dist=dist_ao_ao, mapped_dist=dist_row_ao, m1=matrix_G_ao)
1196 :
1197 532 : CALL timestop(handle)
1198 :
1199 532 : END SUBROUTINE build_G_grid
1200 :
1201 : ! **************************************************************************************************
1202 : !> \brief Generalized routine to compute OUT = A * B * A^T OR OUT = A^T * B * A using DBCSR
1203 : !> \param transA_left ...
1204 : !> \param transA_right ...
1205 : !> \param matrix_A ...
1206 : !> \param matrix_B ...
1207 : !> \param matrix_out ...
1208 : !> \param eps_filter ...
1209 : ! **************************************************************************************************
1210 :
1211 1034 : SUBROUTINE contract_A_B_A(transA_left, transA_right, matrix_A, matrix_B, matrix_out, eps_filter)
1212 :
1213 : CHARACTER(LEN=1), INTENT(IN) :: transA_left, transA_right
1214 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_A, matrix_B, matrix_out
1215 : REAL(KIND=dp), INTENT(IN) :: eps_filter
1216 :
1217 : CHARACTER(LEN=*), PARAMETER :: routineN = 'contract_A_B_A'
1218 :
1219 : INTEGER :: handle
1220 : TYPE(dbcsr_type) :: matrix_tmp
1221 :
1222 1034 : CALL timeset(routineN, handle)
1223 :
1224 1034 : CALL dbcsr_create(matrix_tmp, template=matrix_A)
1225 :
1226 1034 : IF (transA_left == "N" .AND. transA_right == "T") THEN
1227 : ! Path 1: Out = A * B * A^T
1228 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_A, matrix_B, &
1229 652 : 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1230 : CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_tmp, matrix_A, &
1231 652 : 0.0_dp, matrix_out, filter_eps=eps_filter)
1232 :
1233 382 : ELSE IF (transA_left == "T" .AND. transA_right == "N") THEN
1234 : ! Path 2: Out = A^T * B * A
1235 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_B, matrix_A, &
1236 382 : 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1237 : CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_A, matrix_tmp, &
1238 382 : 0.0_dp, matrix_out, filter_eps=eps_filter)
1239 : ELSE
1240 0 : CPABORT("Unsupported transposition pair in contract_A_B_A")
1241 : END IF
1242 :
1243 1034 : CALL dbcsr_release(matrix_tmp)
1244 :
1245 1034 : CALL timestop(handle)
1246 :
1247 1034 : END SUBROUTINE contract_A_B_A
1248 :
1249 : ! **************************************************************************************************
1250 : !> \brief Computes C = A ◦ B (Element-wise Hadamard product) for sparse DBCSR matrices.
1251 : !> \param matrix_A ...
1252 : !> \param matrix_B ...
1253 : !> \param matrix_C ...
1254 : !> \param fac (Scaling factor applied to the product)
1255 : ! **************************************************************************************************
1256 :
1257 804 : SUBROUTINE hadamard_product(matrix_A, matrix_B, matrix_C, fac)
1258 :
1259 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_A, matrix_B, matrix_C
1260 : REAL(KIND=dp), INTENT(IN) :: fac
1261 :
1262 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hadamard_product'
1263 :
1264 : INTEGER :: col, handle, row
1265 : LOGICAL :: found
1266 402 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: blk_B, blk_C
1267 : TYPE(dbcsr_iterator_type) :: iter
1268 :
1269 402 : CALL timeset(routineN, handle)
1270 :
1271 402 : CALL dbcsr_copy(matrix_C, matrix_A)
1272 :
1273 402 : CALL dbcsr_iterator_start(iter, matrix_C)
1274 2211 : DO WHILE (dbcsr_iterator_blocks_left(iter))
1275 1809 : CALL dbcsr_iterator_next_block(iter, row, col, blk_C)
1276 :
1277 1809 : CALL dbcsr_get_block_p(matrix_B, row, col, blk_B, found)
1278 :
1279 2211 : IF (found) THEN
1280 159523682 : blk_C(:, :) = fac*blk_C(:, :)*blk_B(:, :)
1281 : ELSE
1282 : ! If B is sparse here, the product is zero
1283 0 : blk_C(:, :) = 0.0_dp
1284 : END IF
1285 : END DO
1286 402 : CALL dbcsr_iterator_stop(iter)
1287 :
1288 402 : CALL timestop(handle)
1289 :
1290 402 : END SUBROUTINE hadamard_product
1291 :
1292 : ! **************************************************************************************************
1293 : !> \brief Computes the exact exchange part of the GW self-energy
1294 : !> \param bs_env ...
1295 : !> \param qs_env ...
1296 : !> \param mat_phi_mu_l ...
1297 : !> \param mat_Z_lP ...
1298 : !> \param fm_Sigma_x_Gamma ...
1299 : ! **************************************************************************************************
1300 :
1301 10 : SUBROUTINE compute_Sigma_x(bs_env, qs_env, mat_phi_mu_l, mat_Z_lP, fm_Sigma_x_Gamma)
1302 :
1303 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1304 : TYPE(qs_environment_type), POINTER :: qs_env
1305 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1306 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_Sigma_x_Gamma
1307 :
1308 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_Sigma_x'
1309 :
1310 : INTEGER :: handle, ispin
1311 10 : INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
1312 10 : dist_row_aux
1313 : REAL(KIND=dp) :: t1
1314 10 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: fm_Vtr_Gamma
1315 : TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
1316 : TYPE(dbcsr_type) :: mat_Sigma_x_Gamma, matrix_D_grid, &
1317 : matrix_Sigma_x_grid, matrix_V_aux, &
1318 : matrix_V_grid
1319 :
1320 10 : CALL timeset(routineN, handle)
1321 :
1322 10 : t1 = m_walltime()
1323 :
1324 42 : ALLOCATE (fm_Sigma_x_Gamma(bs_env%n_spin))
1325 22 : DO ispin = 1, bs_env%n_spin
1326 22 : CALL cp_fm_create(fm_Sigma_x_Gamma(ispin), bs_env%fm_s_Gamma%matrix_struct)
1327 : END DO
1328 :
1329 10 : CALL dbcsr_create(mat_Sigma_x_Gamma, template=bs_env%mat_ao_ao%matrix)
1330 :
1331 : ! =========================================================================
1332 : ! 1. SETUP CORE TOPOLOGIES
1333 : ! =========================================================================
1334 10 : CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
1335 10 : CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
1336 :
1337 : ! =========================================================================
1338 : ! 2. COMPUTE V^tr_ll'
1339 : ! =========================================================================
1340 : CALL RI_2c_integral_mat(qs_env, fm_Vtr_Gamma, bs_env%fm_RI_RI, bs_env%n_RI, &
1341 10 : bs_env%trunc_coulomb, do_kpoints=.FALSE.)
1342 :
1343 : ! M^-1 * V^tr * M^-1 directly modifies fm_Vtr_Gamma(:, 1)
1344 10 : CALL multiply_fm_W_MIC_time_with_Minv_Gamma(bs_env, qs_env, fm_Vtr_Gamma(:, 1))
1345 :
1346 10 : CALL dbcsr_create(matrix_V_aux, "V_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
1347 10 : CALL dbcsr_create(matrix_V_grid, "V_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1348 :
1349 10 : CALL copy_fm_to_dbcsr(fm_Vtr_Gamma(1, 1), matrix_V_aux, keep_sparsity=.FALSE.)
1350 :
1351 : ! V^tr_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
1352 10 : CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_V_aux, matrix_V_grid, bs_env%eps_filter)
1353 10 : CALL dbcsr_release(matrix_V_aux)
1354 :
1355 : ! =========================================================================
1356 : ! 3. SPIN LOOP FOR EXACT EXCHANGE
1357 : ! =========================================================================
1358 22 : DO ispin = 1, bs_env%n_spin
1359 :
1360 : ! Density matrix on grid is essentially G_occ at tau = 0.0
1361 : ! D_µν = sum_n^occ C_µn(k=0) C_νn(k=0)
1362 : ! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
1363 12 : CALL dbcsr_create(matrix_D_grid, "D_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1364 12 : CALL build_G_grid(bs_env, 0.0_dp, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_D_grid, bs_env%eps_filter)
1365 :
1366 : ! Element-wise Hadamard product: Σ^x_grid = D_grid ◦ V_grid
1367 : ! Σ^x_ll' = D_ll' * V^tr_ll'
1368 12 : CALL dbcsr_create(matrix_Sigma_x_grid, template=matrix_V_grid)
1369 12 : CALL hadamard_product(matrix_D_grid, matrix_V_grid, matrix_Sigma_x_grid, 1.0_dp)
1370 :
1371 12 : CALL dbcsr_release(matrix_D_grid)
1372 :
1373 : ! Transform back to AO basis: Σ^x_ao = -1.0 * phi^T * Σ^x_grid * phi
1374 : ! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
1375 12 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_x_grid, mat_Sigma_x_Gamma, bs_env%eps_filter)
1376 12 : CALL dbcsr_scale(mat_Sigma_x_Gamma, -1.0_dp)
1377 :
1378 12 : CALL dbcsr_release(matrix_Sigma_x_grid)
1379 :
1380 : ! Data I/O and Export to CP2K Full Matrices
1381 22 : CALL copy_dbcsr_to_fm(mat_Sigma_x_Gamma, fm_Sigma_x_Gamma(ispin))
1382 :
1383 : END DO ! ispin
1384 :
1385 10 : IF (bs_env%unit_nr > 0) THEN
1386 : WRITE (bs_env%unit_nr, '(T2,A,T58,A,F7.1,A)') &
1387 5 : 'Computed Σ^x(k=0),', ' Execution time', m_walltime() - t1, ' s'
1388 5 : WRITE (bs_env%unit_nr, '(A)') ' '
1389 : END IF
1390 :
1391 : ! =========================================================================
1392 : ! 4. CLEANUP
1393 : ! =========================================================================
1394 : CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid, &
1395 10 : m1=mat_Sigma_x_Gamma, m2=matrix_V_grid)
1396 10 : CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
1397 :
1398 10 : CALL cp_fm_release(fm_Vtr_Gamma)
1399 :
1400 10 : CALL timestop(handle)
1401 :
1402 30 : END SUBROUTINE compute_Sigma_x
1403 :
1404 : ! **************************************************************************************************
1405 : !> \brief Computes the correlation part of the GW self-energy
1406 : !> \param bs_env ...
1407 : !> \param fm_W_MIC_time ...
1408 : !> \param mat_phi_mu_l ...
1409 : !> \param mat_Z_lP ...
1410 : !> \param fm_Sigma_c_Gamma_time ...
1411 : ! **************************************************************************************************
1412 :
1413 10 : SUBROUTINE compute_Sigma_c(bs_env, fm_W_MIC_time, mat_phi_mu_l, mat_Z_lP, fm_Sigma_c_Gamma_time)
1414 :
1415 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1416 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_W_MIC_time
1417 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1418 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_Sigma_c_Gamma_time
1419 :
1420 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_Sigma_c'
1421 :
1422 : INTEGER :: handle, i_t, ispin
1423 10 : INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
1424 10 : dist_row_aux
1425 : REAL(KIND=dp) :: t1, tau
1426 : TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
1427 10 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_Sigma_neg_tau, mat_Sigma_pos_tau
1428 : TYPE(dbcsr_type) :: matrix_G_occ_grid, matrix_G_vir_grid, matrix_Sigma_neg_grid, &
1429 : matrix_Sigma_pos_grid, matrix_W_aux, matrix_W_grid
1430 :
1431 10 : CALL timeset(routineN, handle)
1432 :
1433 : ! =========================================================================
1434 : ! 1. SETUP CORE TOPOLOGIES AND PRE-ALLOCATE OUTPUT ARRAYS
1435 : ! =========================================================================
1436 10 : CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
1437 10 : CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
1438 :
1439 : ! Pre-allocate local DBCSR matrices to act as targets for final output
1440 10 : NULLIFY (mat_Sigma_neg_tau, mat_Sigma_pos_tau)
1441 182 : ALLOCATE (mat_Sigma_neg_tau(bs_env%num_time_freq_points, bs_env%n_spin))
1442 182 : ALLOCATE (mat_Sigma_pos_tau(bs_env%num_time_freq_points, bs_env%n_spin))
1443 :
1444 120 : DO i_t = 1, bs_env%num_time_freq_points
1445 250 : DO ispin = 1, bs_env%n_spin
1446 130 : ALLOCATE (mat_Sigma_neg_tau(i_t, ispin)%matrix)
1447 130 : ALLOCATE (mat_Sigma_pos_tau(i_t, ispin)%matrix)
1448 130 : CALL dbcsr_create(mat_Sigma_neg_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
1449 240 : CALL dbcsr_create(mat_Sigma_pos_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
1450 : END DO
1451 : END DO
1452 :
1453 : ! =========================================================================
1454 : ! 2. MAIN IMAGINARY TIME LOOP
1455 : ! =========================================================================
1456 120 : DO i_t = 1, bs_env%num_time_freq_points
1457 110 : tau = bs_env%imag_time_points(i_t)
1458 :
1459 : ! -------------------------------------------------------------------
1460 : ! Compute W_grid = Z * W_aux * Z^T
1461 : ! -------------------------------------------------------------------
1462 110 : CALL dbcsr_create(matrix_W_aux, "W_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
1463 110 : CALL dbcsr_create(matrix_W_grid, "W_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1464 :
1465 110 : CALL copy_fm_to_dbcsr(fm_W_MIC_time(i_t), matrix_W_aux, keep_sparsity=.FALSE.)
1466 :
1467 : ! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
1468 110 : CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_W_aux, matrix_W_grid, bs_env%eps_filter)
1469 :
1470 110 : CALL dbcsr_release(matrix_W_aux) ! Clean up aux basis immediately
1471 :
1472 240 : DO ispin = 1, bs_env%n_spin
1473 130 : t1 = m_walltime()
1474 :
1475 : ! -------------------------------------------------------------------
1476 : ! A. Transform Green's Functions to the Grid
1477 : ! -------------------------------------------------------------------
1478 130 : CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1479 130 : CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1480 :
1481 : ! G^occ_µλ(i|τ|,k=0) = sum_G^occ_µλn^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1482 : ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1483 130 : CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_G_occ_grid, bs_env%eps_filter)
1484 :
1485 : ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1486 : ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1487 130 : CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, matrix_G_vir_grid, bs_env%eps_filter)
1488 :
1489 : ! -------------------------------------------------------------------
1490 : ! B. Element-wise Hadamard Products for Sigma_c on Grid
1491 : ! Σ_neg_grid = G_occ_grid ◦ W_grid
1492 : ! Σ_pos_grid = G_vir_grid ◦ W_grid
1493 : ! -------------------------------------------------------------------
1494 130 : CALL dbcsr_create(matrix_Sigma_neg_grid, template=matrix_W_grid)
1495 130 : CALL dbcsr_create(matrix_Sigma_pos_grid, template=matrix_W_grid)
1496 :
1497 : ! Σ^c_ll'(iτ,k=0) = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
1498 130 : CALL hadamard_product(matrix_G_occ_grid, matrix_W_grid, matrix_Sigma_neg_grid, 1.0_dp)
1499 :
1500 : ! Σ^c_ll'(iτ,k=0) = G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
1501 130 : CALL hadamard_product(matrix_G_vir_grid, matrix_W_grid, matrix_Sigma_pos_grid, 1.0_dp)
1502 :
1503 : ! Instantly purge massive G_grid arrays to save memory
1504 130 : CALL dbcsr_release(matrix_G_occ_grid)
1505 130 : CALL dbcsr_release(matrix_G_vir_grid)
1506 :
1507 : ! -------------------------------------------------------------------
1508 : ! C. Transform Sigma back to AO Basis
1509 : ! Σ_AO = phi^T * Σ_grid * phi
1510 : ! -------------------------------------------------------------------
1511 :
1512 : ! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ < 0
1513 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_neg_grid, &
1514 130 : mat_Sigma_neg_tau(i_t, ispin)%matrix, bs_env%eps_filter)
1515 130 : CALL dbcsr_scale(mat_Sigma_neg_tau(i_t, ispin)%matrix, -1.0_dp)
1516 :
1517 : ! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ > 0
1518 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_pos_grid, &
1519 130 : mat_Sigma_pos_tau(i_t, ispin)%matrix, bs_env%eps_filter)
1520 :
1521 : ! Purge Grid Sigma arrays
1522 130 : CALL dbcsr_release(matrix_Sigma_neg_grid)
1523 130 : CALL dbcsr_release(matrix_Sigma_pos_grid)
1524 :
1525 240 : IF (bs_env%unit_nr > 0) THEN
1526 : WRITE (bs_env%unit_nr, '(T2,A,I10,A,I3,A,F7.1,A)') &
1527 65 : 'Computed Σ^c(iτ,k=0) for time point ', i_t, ' /', bs_env%num_time_freq_points, &
1528 130 : ', Execution time', m_walltime() - t1, ' s'
1529 : END IF
1530 :
1531 : END DO ! ispin
1532 :
1533 : ! Release the W_grid for this time point
1534 120 : CALL dbcsr_release(matrix_W_grid)
1535 :
1536 : END DO ! i_t
1537 :
1538 10 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1539 :
1540 : ! -------------------------------------------------------------------------
1541 : ! 3. FINALIZE AND CLEANUP
1542 : ! -------------------------------------------------------------------------
1543 : CALL fill_fm_Sigma_c_Gamma_time(fm_Sigma_c_Gamma_time, bs_env, &
1544 10 : mat_Sigma_pos_tau, mat_Sigma_neg_tau)
1545 :
1546 10 : CALL cp_fm_release(fm_W_MIC_time)
1547 :
1548 10 : CALL dbcsr_deallocate_matrix_set(mat_Sigma_neg_tau)
1549 10 : CALL dbcsr_deallocate_matrix_set(mat_Sigma_pos_tau)
1550 :
1551 10 : CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid)
1552 10 : CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
1553 :
1554 10 : CALL delete_unnecessary_files(bs_env)
1555 10 : CALL timestop(handle)
1556 :
1557 10 : END SUBROUTINE compute_Sigma_c
1558 :
1559 : ! **************************************************************************************************
1560 : !> \brief DBCSR Topology Generation
1561 : !> \param matrix_template ...
1562 : !> \param dim_type ...
1563 : !> \param square_dist ...
1564 : !> \param blk_sizes ...
1565 : !> \param mapped_dist ...
1566 : ! **************************************************************************************************
1567 :
1568 572 : SUBROUTINE setup_square_topology(matrix_template, dim_type, square_dist, blk_sizes, mapped_dist)
1569 :
1570 : TYPE(dbcsr_type), INTENT(IN) :: matrix_template
1571 : CHARACTER(LEN=*), INTENT(IN) :: dim_type
1572 : TYPE(dbcsr_distribution_type), INTENT(OUT) :: square_dist
1573 : INTEGER, DIMENSION(:), INTENT(OUT), POINTER :: blk_sizes, mapped_dist
1574 :
1575 : INTEGER :: i, np
1576 572 : INTEGER, DIMENSION(:), POINTER :: col_blk, col_dist, row_blk, row_dist
1577 : TYPE(dbcsr_distribution_type) :: dist_template
1578 :
1579 : CALL dbcsr_get_info(matrix_template, distribution=dist_template, &
1580 572 : row_blk_size=row_blk, col_blk_size=col_blk)
1581 572 : CALL dbcsr_distribution_get(dist_template, row_dist=row_dist, col_dist=col_dist)
1582 :
1583 572 : IF (TRIM(dim_type) == 'ROW') THEN
1584 : ! Creates ROW x ROW (e.g., Grid x Grid from mat_phi_mu_l)
1585 20 : blk_sizes => row_blk
1586 76 : np = MAXVAL(col_dist) + 1 ! npcol
1587 60 : ALLOCATE (mapped_dist(SIZE(blk_sizes)))
1588 80 : DO i = 1, SIZE(blk_sizes)
1589 80 : mapped_dist(i) = MOD(i - 1, np)
1590 : END DO
1591 : CALL dbcsr_distribution_new(square_dist, template=dist_template, &
1592 20 : row_dist=row_dist, col_dist=mapped_dist)
1593 :
1594 552 : ELSE IF (TRIM(dim_type) == 'COL') THEN
1595 : ! Creates COL x COL (e.g., Aux x Aux from mat_Z_lP)
1596 552 : blk_sizes => col_blk
1597 2208 : np = MAXVAL(row_dist) + 1 ! nprow
1598 1656 : ALLOCATE (mapped_dist(SIZE(blk_sizes)))
1599 2040 : DO i = 1, SIZE(blk_sizes)
1600 2040 : mapped_dist(i) = MOD(i - 1, np)
1601 : END DO
1602 : CALL dbcsr_distribution_new(square_dist, template=dist_template, &
1603 552 : row_dist=mapped_dist, col_dist=col_dist)
1604 : END IF
1605 :
1606 572 : END SUBROUTINE setup_square_topology
1607 :
1608 : ! **************************************************************************************************
1609 : !> \brief DBCSR matrices deallocation
1610 : !> \param dist ...
1611 : !> \param mapped_dist ...
1612 : !> \param m1 ...
1613 : !> \param m2 ...
1614 : !> \param m3 ...
1615 : !> \param m4 ...
1616 : ! **************************************************************************************************
1617 :
1618 572 : SUBROUTINE release_dbcsr_topology_and_matrices(dist, mapped_dist, m1, m2, m3, m4)
1619 :
1620 : TYPE(dbcsr_distribution_type), INTENT(INOUT), &
1621 : OPTIONAL :: dist
1622 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, &
1623 : POINTER :: mapped_dist
1624 : TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL :: m1, m2, m3, m4
1625 :
1626 572 : IF (PRESENT(dist)) CALL dbcsr_distribution_release(dist)
1627 572 : IF (PRESENT(mapped_dist)) THEN
1628 572 : IF (ASSOCIATED(mapped_dist)) THEN
1629 572 : DEALLOCATE (mapped_dist)
1630 : NULLIFY (mapped_dist)
1631 : END IF
1632 : END IF
1633 572 : IF (PRESENT(m1)) CALL dbcsr_release(m1)
1634 572 : IF (PRESENT(m2)) CALL dbcsr_release(m2)
1635 572 : IF (PRESENT(m3)) CALL dbcsr_release(m3)
1636 572 : IF (PRESENT(m4)) CALL dbcsr_release(m4)
1637 :
1638 572 : END SUBROUTINE release_dbcsr_topology_and_matrices
1639 :
1640 : END MODULE gw_large_cell_Gamma_ri_rs
|