Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief RI-methods for HFX and K-points.
10 : !> \auhtor Augustin Bussy (01.2023)
11 : ! **************************************************************************************************
12 :
13 : MODULE hfx_ri_kp
14 : USE admm_types, ONLY: get_admm_env
15 : USE atomic_kind_types, ONLY: atomic_kind_type,&
16 : get_atomic_kind_set
17 : USE basis_set_types, ONLY: get_gto_basis_set,&
18 : gto_basis_set_p_type
19 : USE bibliography, ONLY: Bussy2024,&
20 : cite_reference
21 : USE cell_types, ONLY: cell_type,&
22 : pbc,&
23 : real_to_scaled,&
24 : scaled_to_real
25 : USE cp_array_utils, ONLY: cp_1d_logical_p_type,&
26 : cp_2d_r_p_type,&
27 : cp_3d_r_p_type
28 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
29 : cp_blacs_env_release,&
30 : cp_blacs_env_type
31 : USE cp_control_types, ONLY: dft_control_type
32 : USE cp_dbcsr_api, ONLY: &
33 : dbcsr_add, dbcsr_clear, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, &
34 : dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, &
35 : dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
36 : dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
37 : dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, &
38 : dbcsr_type_symmetric
39 : USE cp_dbcsr_cholesky, ONLY: cp_dbcsr_cholesky_decompose,&
40 : cp_dbcsr_cholesky_invert
41 : USE cp_dbcsr_contrib, ONLY: dbcsr_dot
42 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_alloc_block_from_nbl
43 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
44 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_dist2d_to_dist
45 : USE dbt_api, ONLY: &
46 : dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
47 : dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, dbt_destroy, &
48 : dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, dbt_filter, &
49 : dbt_finalize, dbt_get_block, dbt_get_info, dbt_get_stored_coordinates, &
50 : dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
51 : dbt_iterator_type, dbt_mp_environ_pgrid, dbt_pgrid_create, dbt_pgrid_destroy, &
52 : dbt_pgrid_type, dbt_put_block, dbt_scale, dbt_type
53 : USE distribution_2d_types, ONLY: distribution_2d_release,&
54 : distribution_2d_type
55 : USE hfx_ri, ONLY: get_idx_to_atom,&
56 : hfx_ri_pre_scf_calc_tensors
57 : USE hfx_types, ONLY: hfx_ri_type
58 : USE input_constants, ONLY: do_potential_short,&
59 : hfx_ri_do_2c_cholesky,&
60 : hfx_ri_do_2c_diag,&
61 : hfx_ri_do_2c_iter
62 : USE input_cp2k_hfx, ONLY: ri_pmat
63 : USE input_section_types, ONLY: section_vals_get_subs_vals,&
64 : section_vals_type,&
65 : section_vals_val_get,&
66 : section_vals_val_set
67 : USE iterate_matrix, ONLY: invert_hotelling
68 : USE kinds, ONLY: default_string_length,&
69 : dp,&
70 : int_8
71 : USE kpoint_types, ONLY: get_kpoint_info,&
72 : kpoint_type
73 : USE libint_2c_3c, ONLY: cutoff_screen_factor
74 : USE machine, ONLY: m_flush,&
75 : m_memory,&
76 : m_walltime
77 : USE mathlib, ONLY: erfc_cutoff
78 : USE message_passing, ONLY: mp_cart_type,&
79 : mp_para_env_type,&
80 : mp_request_type,&
81 : mp_waitall
82 : USE particle_methods, ONLY: get_particle_set
83 : USE particle_types, ONLY: particle_type
84 : USE physcon, ONLY: angstrom
85 : USE qs_environment_types, ONLY: get_qs_env,&
86 : qs_environment_type
87 : USE qs_force_types, ONLY: qs_force_type
88 : USE qs_integral_utils, ONLY: basis_set_list_setup
89 : USE qs_interactions, ONLY: init_interaction_radii_orb_basis
90 : USE qs_kind_types, ONLY: qs_kind_type
91 : USE qs_neighbor_list_types, ONLY: get_iterator_info,&
92 : neighbor_list_iterate,&
93 : neighbor_list_iterator_create,&
94 : neighbor_list_iterator_p_type,&
95 : neighbor_list_iterator_release,&
96 : neighbor_list_set_p_type,&
97 : release_neighbor_list_sets
98 : USE qs_scf_types, ONLY: qs_scf_env_type
99 : USE qs_tensors, ONLY: &
100 : build_2c_derivatives, build_2c_neighbor_lists, build_3c_derivatives, &
101 : build_3c_neighbor_lists, get_3c_iterator_info, get_tensor_occupancy, &
102 : neighbor_list_3c_destroy, neighbor_list_3c_iterate, neighbor_list_3c_iterator_create, &
103 : neighbor_list_3c_iterator_destroy
104 : USE qs_tensors_types, ONLY: create_2c_tensor,&
105 : create_3c_tensor,&
106 : create_tensor_batches,&
107 : distribution_2d_create,&
108 : distribution_3d_create,&
109 : distribution_3d_type,&
110 : neighbor_list_3c_iterator_type,&
111 : neighbor_list_3c_type
112 : USE util, ONLY: get_limit
113 : USE virial_types, ONLY: virial_type
114 : #include "./base/base_uses.f90"
115 :
116 : !$ USE OMP_LIB, ONLY: omp_get_num_threads
117 :
118 : IMPLICIT NONE
119 : PRIVATE
120 :
121 : PUBLIC :: hfx_ri_update_ks_kp, hfx_ri_update_forces_kp
122 :
123 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri_kp'
124 : CONTAINS
125 :
126 : ! **************************************************************************************************
127 : !> \brief I_1nitialize the ri_data for K-point. For now, we take the normal, usual existing ri_data
128 : !> and we adapt it to our needs
129 : !> \param dbcsr_template ...
130 : !> \param ri_data ...
131 : !> \param qs_env ...
132 : ! **************************************************************************************************
133 88 : SUBROUTINE adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
134 : TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
135 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
136 : TYPE(qs_environment_type), POINTER :: qs_env
137 :
138 : INTEGER :: i_img, i_RI, i_spin, iatom, natom, &
139 : nblks_RI, nimg, nkind, nspins
140 88 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, dist1, dist2, dist3
141 : TYPE(dft_control_type), POINTER :: dft_control
142 : TYPE(mp_para_env_type), POINTER :: para_env
143 :
144 88 : NULLIFY (dft_control, para_env)
145 :
146 : !The main thing that we need to do is to allocate more space for the integrals, such that there
147 : !is room for each periodic image. Note that we only go in 1D, i.e. we store (mu^0 sigma^a|P^0),
148 : !and (P^0|Q^a) => the RI basis is always in the main cell.
149 :
150 : !Get kpoint info
151 88 : CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, para_env=para_env, nkind=nkind)
152 88 : nimg = ri_data%nimg
153 :
154 : !Along the RI direction we have basis elements spread accross ncell_RI images.
155 88 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
156 264 : ALLOCATE (bsizes_RI_ext(nblks_RI*ri_data%ncell_RI))
157 642 : DO i_RI = 1, ri_data%ncell_RI
158 3264 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
159 : END DO
160 :
161 5676 : ALLOCATE (ri_data%t_3c_int_ctr_1(1, nimg))
162 : CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), dist1, dist2, dist3, &
163 : ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_RI_ext, &
164 88 : ri_data%bsizes_AO_split, [1, 2], [3], name="(AO RI | AO)")
165 :
166 2354 : DO i_img = 2, nimg
167 2354 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(1, i_img))
168 : END DO
169 88 : DEALLOCATE (dist1, dist2, dist3)
170 :
171 968 : ALLOCATE (ri_data%t_3c_int_ctr_2(1, 1))
172 : CALL create_3c_tensor(ri_data%t_3c_int_ctr_2(1, 1), dist1, dist2, dist3, &
173 : ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_RI_ext, &
174 88 : ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
175 88 : DEALLOCATE (dist1, dist2, dist3)
176 :
177 : !We use full block sizes for the 2c quantities
178 88 : DEALLOCATE (bsizes_RI_ext)
179 88 : nblks_RI = SIZE(ri_data%bsizes_RI)
180 264 : ALLOCATE (bsizes_RI_ext(nblks_RI*ri_data%ncell_RI))
181 642 : DO i_RI = 1, ri_data%ncell_RI
182 1750 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI(:)
183 : END DO
184 :
185 3784 : ALLOCATE (ri_data%t_2c_inv(1, natom), ri_data%t_2c_int(1, natom), ri_data%t_2c_pot(1, natom))
186 : CALL create_2c_tensor(ri_data%t_2c_inv(1, 1), dist1, dist2, ri_data%pgrid_2d, &
187 : bsizes_RI_ext, bsizes_RI_ext, &
188 88 : name="(RI | RI)")
189 88 : DEALLOCATE (dist1, dist2)
190 88 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, 1))
191 88 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1))
192 176 : DO iatom = 2, natom
193 88 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_inv(1, iatom))
194 88 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, iatom))
195 176 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, iatom))
196 : END DO
197 :
198 440 : ALLOCATE (ri_data%kp_cost(natom, natom, nimg))
199 16566 : ri_data%kp_cost = 0.0_dp
200 :
201 : !We store the density and KS matrix in tensor format
202 88 : nspins = dft_control%nspins
203 12212 : ALLOCATE (ri_data%rho_ao_t(nspins, nimg), ri_data%ks_t(nspins, nimg))
204 : CALL create_2c_tensor(ri_data%rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
205 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
206 88 : name="(AO | AO)")
207 88 : DEALLOCATE (dist1, dist2)
208 :
209 88 : CALL dbt_create(dbcsr_template, ri_data%ks_t(1, 1))
210 :
211 88 : IF (nspins == 2) THEN
212 26 : CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(2, 1))
213 26 : CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(2, 1))
214 : END IF
215 2354 : DO i_img = 2, nimg
216 4980 : DO i_spin = 1, nspins
217 2626 : CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(i_spin, i_img))
218 4892 : CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(i_spin, i_img))
219 : END DO
220 : END DO
221 :
222 264 : END SUBROUTINE adapt_ri_data_to_kp
223 :
224 : ! **************************************************************************************************
225 : !> \brief The pre-scf steps for RI-HFX k-points calculation. Namely the calculation of the integrals
226 : !> \param dbcsr_template ...
227 : !> \param ri_data ...
228 : !> \param qs_env ...
229 : ! **************************************************************************************************
230 88 : SUBROUTINE hfx_ri_pre_scf_kp(dbcsr_template, ri_data, qs_env)
231 : TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
232 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
233 : TYPE(qs_environment_type), POINTER :: qs_env
234 :
235 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_kp'
236 :
237 : INTEGER :: handle, i_img, iatom, natom, nimg, nkind
238 88 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: t_2c_op_pot, t_2c_op_RI
239 88 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
240 : TYPE(dft_control_type), POINTER :: dft_control
241 :
242 88 : NULLIFY (dft_control)
243 :
244 88 : CALL timeset(routineN, handle)
245 :
246 88 : CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, nkind=nkind)
247 :
248 88 : CALL cleanup_kp(ri_data)
249 :
250 : !We do all the checks on what we allow in this initial implementation
251 88 : IF (ri_data%flavor /= ri_pmat) CPABORT("K-points RI-HFX only with RHO flavor")
252 88 : IF (ri_data%same_op) ri_data%same_op = .FALSE. !force the full calculation with RI metric
253 88 : IF (ABS(ri_data%eps_pgf_orb - dft_control%qs_control%eps_pgf_orb) > 1.0E-16_dp) &
254 0 : CPABORT("RI%EPS_PGF_ORB and QS%EPS_PGF_ORB must be identical for RI-HFX k-points")
255 :
256 88 : CALL get_kp_and_ri_images(ri_data, qs_env)
257 88 : nimg = ri_data%nimg
258 :
259 : !Calculate the integrals
260 5060 : ALLOCATE (t_2c_op_pot(nimg), t_2c_op_RI(nimg))
261 5676 : ALLOCATE (t_3c_int(1, nimg))
262 88 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int, do_kpoints=.TRUE.)
263 :
264 : !Make sure the internals have the k-point format
265 88 : CALL adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
266 :
267 : !For each atom i, we calculate the inverse RI metric (P^0 | Q^0)^-1 without external bumping yet
268 : !Also store the off-diagonal integrals of the RI metric in case of forces, bumped from the left
269 264 : DO iatom = 1, natom
270 : CALL get_ext_2c_int(ri_data%t_2c_inv(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, qs_env, &
271 176 : do_inverse=.TRUE.)
272 : !for the forces:
273 : !off-diagonl RI metric bumped from the left
274 : CALL get_ext_2c_int(ri_data%t_2c_int(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, &
275 176 : qs_env, off_diagonal=.TRUE.)
276 176 : CALL apply_bump(ri_data%t_2c_int(1, iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.FALSE.)
277 :
278 : !RI metric with bumped off-diagonal blocks (but not inverted), depumed from left and right
279 : CALL get_ext_2c_int(ri_data%t_2c_pot(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, qs_env, &
280 176 : do_inverse=.TRUE., skip_inverse=.TRUE.)
281 : CALL apply_bump(ri_data%t_2c_pot(1, iatom), iatom, ri_data, qs_env, from_left=.TRUE., &
282 264 : from_right=.TRUE., debump=.TRUE.)
283 :
284 : END DO
285 :
286 2442 : DO i_img = 1, nimg
287 2442 : CALL dbcsr_release(t_2c_op_RI(i_img))
288 : END DO
289 :
290 4884 : ALLOCATE (ri_data%kp_mat_2c_pot(1, nimg))
291 2442 : DO i_img = 1, nimg
292 2354 : CALL dbcsr_create(ri_data%kp_mat_2c_pot(1, i_img), template=t_2c_op_pot(i_img))
293 2354 : CALL dbcsr_copy(ri_data%kp_mat_2c_pot(1, i_img), t_2c_op_pot(i_img))
294 2442 : CALL dbcsr_release(t_2c_op_pot(i_img))
295 : END DO
296 :
297 : !reorder the 3c integrals such that empty images are bunched up together
298 88 : CALL reorder_3c_ints(t_3c_int(1, :), ri_data)
299 :
300 : !Pre-contract all 3c integrals with the bumped inverse RI metric (P^0|Q^0)^-1,
301 : !and store in ri_data%t_3c_int_ctr_1
302 88 : CALL precontract_3c_ints(t_3c_int, ri_data, qs_env)
303 :
304 88 : CALL timestop(handle)
305 :
306 2530 : END SUBROUTINE hfx_ri_pre_scf_kp
307 :
308 : ! **************************************************************************************************
309 : !> \brief clean-up the KP specific data from ri_data
310 : !> \param ri_data ...
311 : ! **************************************************************************************************
312 88 : SUBROUTINE cleanup_kp(ri_data)
313 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
314 :
315 : INTEGER :: i, j
316 :
317 88 : IF (ALLOCATED(ri_data%kp_cost)) DEALLOCATE (ri_data%kp_cost)
318 88 : IF (ALLOCATED(ri_data%idx_to_img)) DEALLOCATE (ri_data%idx_to_img)
319 88 : IF (ALLOCATED(ri_data%img_to_idx)) DEALLOCATE (ri_data%img_to_idx)
320 88 : IF (ALLOCATED(ri_data%present_images)) DEALLOCATE (ri_data%present_images)
321 88 : IF (ALLOCATED(ri_data%img_to_RI_cell)) DEALLOCATE (ri_data%img_to_RI_cell)
322 88 : IF (ALLOCATED(ri_data%RI_cell_to_img)) DEALLOCATE (ri_data%RI_cell_to_img)
323 :
324 88 : IF (ALLOCATED(ri_data%kp_mat_2c_pot)) THEN
325 870 : DO j = 1, SIZE(ri_data%kp_mat_2c_pot, 2)
326 1712 : DO i = 1, SIZE(ri_data%kp_mat_2c_pot, 1)
327 1684 : CALL dbcsr_release(ri_data%kp_mat_2c_pot(i, j))
328 : END DO
329 : END DO
330 28 : DEALLOCATE (ri_data%kp_mat_2c_pot)
331 : END IF
332 :
333 88 : IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
334 870 : DO i = 1, SIZE(ri_data%kp_t_3c_int)
335 870 : CALL dbt_destroy(ri_data%kp_t_3c_int(i))
336 : END DO
337 870 : DEALLOCATE (ri_data%kp_t_3c_int)
338 : END IF
339 :
340 88 : IF (ALLOCATED(ri_data%t_2c_inv)) THEN
341 204 : DO j = 1, SIZE(ri_data%t_2c_inv, 2)
342 320 : DO i = 1, SIZE(ri_data%t_2c_inv, 1)
343 232 : CALL dbt_destroy(ri_data%t_2c_inv(i, j))
344 : END DO
345 : END DO
346 204 : DEALLOCATE (ri_data%t_2c_inv)
347 : END IF
348 :
349 88 : IF (ALLOCATED(ri_data%t_2c_int)) THEN
350 204 : DO j = 1, SIZE(ri_data%t_2c_int, 2)
351 320 : DO i = 1, SIZE(ri_data%t_2c_int, 1)
352 232 : CALL dbt_destroy(ri_data%t_2c_int(i, j))
353 : END DO
354 : END DO
355 204 : DEALLOCATE (ri_data%t_2c_int)
356 : END IF
357 :
358 88 : IF (ALLOCATED(ri_data%t_2c_pot)) THEN
359 204 : DO j = 1, SIZE(ri_data%t_2c_pot, 2)
360 320 : DO i = 1, SIZE(ri_data%t_2c_pot, 1)
361 232 : CALL dbt_destroy(ri_data%t_2c_pot(i, j))
362 : END DO
363 : END DO
364 204 : DEALLOCATE (ri_data%t_2c_pot)
365 : END IF
366 :
367 88 : IF (ALLOCATED(ri_data%t_3c_int_ctr_1)) THEN
368 990 : DO j = 1, SIZE(ri_data%t_3c_int_ctr_1, 2)
369 1892 : DO i = 1, SIZE(ri_data%t_3c_int_ctr_1, 1)
370 1804 : CALL dbt_destroy(ri_data%t_3c_int_ctr_1(i, j))
371 : END DO
372 : END DO
373 990 : DEALLOCATE (ri_data%t_3c_int_ctr_1)
374 : END IF
375 :
376 88 : IF (ALLOCATED(ri_data%t_3c_int_ctr_2)) THEN
377 176 : DO j = 1, SIZE(ri_data%t_3c_int_ctr_2, 2)
378 264 : DO i = 1, SIZE(ri_data%t_3c_int_ctr_2, 1)
379 176 : CALL dbt_destroy(ri_data%t_3c_int_ctr_2(i, j))
380 : END DO
381 : END DO
382 176 : DEALLOCATE (ri_data%t_3c_int_ctr_2)
383 : END IF
384 :
385 88 : IF (ALLOCATED(ri_data%rho_ao_t)) THEN
386 990 : DO j = 1, SIZE(ri_data%rho_ao_t, 2)
387 2134 : DO i = 1, SIZE(ri_data%rho_ao_t, 1)
388 2046 : CALL dbt_destroy(ri_data%rho_ao_t(i, j))
389 : END DO
390 : END DO
391 1232 : DEALLOCATE (ri_data%rho_ao_t)
392 : END IF
393 :
394 88 : IF (ALLOCATED(ri_data%ks_t)) THEN
395 990 : DO j = 1, SIZE(ri_data%ks_t, 2)
396 2134 : DO i = 1, SIZE(ri_data%ks_t, 1)
397 2046 : CALL dbt_destroy(ri_data%ks_t(i, j))
398 : END DO
399 : END DO
400 1232 : DEALLOCATE (ri_data%ks_t)
401 : END IF
402 :
403 88 : END SUBROUTINE cleanup_kp
404 :
405 : ! **************************************************************************************************
406 : !> \brief Prints a progress bar for the k-point RI-HFX triple loop
407 : !> \param b_img ...
408 : !> \param nimg ...
409 : !> \param iprint ...
410 : !> \param ri_data ...
411 : ! **************************************************************************************************
412 0 : SUBROUTINE print_progress_bar(b_img, nimg, iprint, ri_data)
413 : INTEGER, INTENT(IN) :: b_img, nimg
414 : INTEGER, INTENT(INOUT) :: iprint
415 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
416 :
417 : CHARACTER(LEN=default_string_length) :: bar
418 : INTEGER :: rep
419 :
420 0 : IF (ri_data%unit_nr > 0) THEN
421 0 : IF (b_img == 1) THEN
422 0 : WRITE (ri_data%unit_nr, '(/T6,A)', advance="no") '[-'
423 0 : CALL m_flush(ri_data%unit_nr)
424 : END IF
425 0 : IF (b_img > iprint*nimg/71) THEN
426 0 : rep = MAX(1, 71/nimg)
427 0 : bar = REPEAT("-", rep)
428 0 : WRITE (ri_data%unit_nr, '(A)', advance="no") TRIM(bar)
429 0 : CALL m_flush(ri_data%unit_nr)
430 0 : iprint = iprint + 1
431 : END IF
432 0 : IF (b_img == nimg) THEN
433 0 : rep = MAX(0, 1 + 71 - iprint*rep)
434 0 : bar = REPEAT("-", rep)
435 0 : WRITE (ri_data%unit_nr, '(A,A)') TRIM(bar), '-]'
436 0 : CALL m_flush(ri_data%unit_nr)
437 : END IF
438 : END IF
439 :
440 0 : END SUBROUTINE print_progress_bar
441 :
442 : ! **************************************************************************************************
443 : !> \brief Update the KS matrices for each real-space image
444 : !> \param qs_env ...
445 : !> \param ri_data ...
446 : !> \param ks_matrix ...
447 : !> \param ehfx ...
448 : !> \param rho_ao ...
449 : !> \param geometry_did_change ...
450 : !> \param nspins ...
451 : !> \param hf_fraction ...
452 : ! **************************************************************************************************
453 274 : SUBROUTINE hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, &
454 : geometry_did_change, nspins, hf_fraction)
455 :
456 : TYPE(qs_environment_type), POINTER :: qs_env
457 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
458 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
459 : REAL(KIND=dp), INTENT(OUT) :: ehfx
460 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
461 : LOGICAL, INTENT(IN) :: geometry_did_change
462 : INTEGER, INTENT(IN) :: nspins
463 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
464 :
465 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_kp'
466 :
467 : INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_spin, iatom, &
468 : iblk, igroup, iprint, jatom, mb_img, n_batch_nze, n_nze, natom, ngroups, nimg, nimg_nze
469 : INTEGER(int_8) :: mem, nflop, nze
470 274 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_at, batch_ranges_nze, &
471 274 : idx_to_at_AO
472 274 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
473 274 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: sparsity_pattern
474 : LOGICAL :: estimate_mem, print_progress, use_delta_p
475 : REAL(dp) :: etmp, fac, occ, pfac, pref, t1, t2, t3, &
476 : t4
477 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
478 : TYPE(dbcsr_type) :: ks_desymm, rho_desymm, tmp
479 274 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
480 : TYPE(dbcsr_type), POINTER :: dbcsr_template
481 274 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ks_t_split, t_2c_ao_tmp, t_2c_work, &
482 274 : t_3c_int, t_3c_work_2, t_3c_work_3
483 274 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: ks_t, ks_t_sub, t_3c_apc, t_3c_apc_sub
484 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
485 : TYPE(section_vals_type), POINTER :: hfx_section, print_section
486 :
487 274 : NULLIFY (para_env, para_env_sub, blacs_env_sub, hfx_section, dbcsr_template, print_section)
488 :
489 274 : CALL cite_reference(Bussy2024)
490 :
491 274 : CALL timeset(routineN, handle)
492 :
493 274 : CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
494 :
495 274 : IF (nspins == 1) THEN
496 190 : fac = 0.5_dp*hf_fraction
497 : ELSE
498 84 : fac = 1.0_dp*hf_fraction
499 : END IF
500 :
501 274 : hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
502 274 : CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
503 274 : CALL section_vals_val_get(hfx_section, "KP_STACK_SIZE", i_val=batch_size)
504 274 : CALL section_vals_val_get(hfx_section, "KP_USE_DELTA_P", l_val=use_delta_p)
505 274 : ri_data%kp_stack_size = batch_size
506 274 : ri_data%kp_ngroups = ngroups
507 :
508 274 : IF (geometry_did_change) THEN
509 88 : CALL hfx_ri_pre_scf_kp(ks_matrix(1, 1)%matrix, ri_data, qs_env)
510 : END IF
511 274 : nimg = ri_data%nimg
512 274 : nimg_nze = ri_data%nimg_nze
513 :
514 : !We need to calculate the KS matrix for each periodic cell with index b: F_mu^0,nu^b
515 : !F_mu^0,nu^b = -0.5 sum_a,c P_sigma^0,lambda^c (mu^0, sigma^a| P^0) V_P^0,Q^b (Q^b| nu^b lambda^a+c)
516 : !with V_P^0,Q^b = (P^0|R^0)^-1 * (R^0|S^b) * (S^b|Q^b)^-1
517 :
518 : !We use a local RI basis set for each atom in the system, which inlcudes RI basis elements for
519 : !each neighboring atom standing within the KIND radius (decay of Gaussian with smallest exponent)
520 :
521 : !We also limit the number of periodic images we consider accorrding to the HFX potentail in the
522 : !RI basis, because if V_P^0,Q^b is zero everywhere, then image b can be ignored (RI basis less diffuse)
523 :
524 : !We manage to calculate each KS matrix doing a double loop on iamges, and a double loop on atoms
525 : !First, we pre-contract and store P_sigma^0,lambda^c (mu^0, sigma^a| P^0) (P^0|R^0)^-1 into T_mu^0,lambda^a+c,P^0
526 : !Then, we loop over b_img, iatom, jatom to get (R^0|S^b)
527 : !Finally, we do an additional loop over a+c images where we do (R^0|S^b) (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c)
528 : !and the final contraction with T_mu^0,lambda^a+c,P^0
529 :
530 : !Note that the 3-center integrals are pre-contracted with the RI metric, and that the same tensor can be used
531 : !(mu^0, sigma^a| P^0) (P^0|R^0) <===> (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c) by relabelling the images
532 :
533 : !By default, build the density tensor based on the difference of this SCF P and that of the prev. SCF
534 274 : pfac = -1.0_dp
535 274 : IF (.NOT. use_delta_p) pfac = 0.0_dp
536 274 : CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, pfac, ri_data, qs_env)
537 :
538 274 : n_nze = 0
539 8162 : DO i_img = 1, nimg
540 17616 : DO i_spin = 1, nspins
541 9454 : CALL get_tensor_occupancy(ri_data%rho_ao_t(i_spin, i_img), nze, occ)
542 17342 : IF (nze > 0) THEN
543 7382 : n_nze = n_nze + 1
544 : END IF
545 : END DO
546 : END DO
547 274 : IF (n_nze == nspins) THEN
548 36 : CPWARN("It is highly recommended to restart from a converged GGA K-point calculations.")
549 : END IF
550 :
551 20630 : ALLOCATE (ks_t(nspins, nimg))
552 8162 : DO i_img = 1, nimg
553 17616 : DO i_spin = 1, nspins
554 17342 : CALL dbt_create(ri_data%ks_t(1, 1), ks_t(i_spin, i_img))
555 : END DO
556 : END DO
557 :
558 822 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
559 274 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
560 :
561 : !First we calculate and store T^1_mu^0,lambda^a+c,P = P_mu^0,lambda^c * (mu_0 sigma^a | P^0) (P^0|R^0)^-1
562 : !To avoid doing nimg**2 tiny contractions that do not scale well with a large number of CPUs,
563 : !we instead do a single loop over the a+c image index. For each a+c, we get a list of allowed
564 : !combination of a,c indices. Then we build TAS tensors P_mu^0,lambda^c with all concerned c's
565 : !and (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 with all a's. Then we perform a single contraction with larger tensors,
566 : !were the sum over a,c is automatically taken care of
567 20356 : ALLOCATE (t_3c_apc(nspins, nimg))
568 8162 : DO i_img = 1, nimg
569 17616 : DO i_spin = 1, nspins
570 17342 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
571 : END DO
572 : END DO
573 274 : CALL contract_pmat_3c(t_3c_apc, ri_data%rho_ao_t, ri_data, qs_env)
574 :
575 274 : IF (MOD(para_env%num_pe, ngroups) /= 0) THEN
576 0 : CPWARN("KP_NGROUPS must be an integer divisor of the total number of MPI ranks. It was set to 1.")
577 0 : ngroups = 1
578 0 : CALL section_vals_val_set(hfx_section, "KP_NGROUPS", i_val=ngroups)
579 : END IF
580 274 : IF ((MOD(ngroups, natom) /= 0) .AND. (MOD(natom, ngroups) /= 0) .AND. geometry_did_change) THEN
581 0 : IF (ngroups > 1) THEN
582 0 : CPWARN("Better load balancing is reached if NGROUPS is a multiple/divisor of the number of atoms")
583 : END IF
584 : END IF
585 274 : group_size = para_env%num_pe/ngroups
586 274 : igroup = para_env%mepos/group_size
587 :
588 274 : ALLOCATE (para_env_sub)
589 274 : CALL para_env_sub%from_split(para_env, igroup)
590 274 : CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
591 :
592 : ! The sparsity pattern of each iatom, jatom pair, on each b_img, and on which subgroup
593 1370 : ALLOCATE (sparsity_pattern(natom, natom, nimg))
594 274 : CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
595 274 : CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
596 :
597 : !Get all the required tensors in the subgroups
598 37012 : ALLOCATE (mat_2c_pot(nimg), ks_t_sub(nspins, nimg), t_2c_ao_tmp(1), ks_t_split(2), t_2c_work(3))
599 : CALL get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
600 274 : group_size, ngroups, para_env, para_env_sub, ri_data)
601 :
602 37286 : ALLOCATE (t_3c_int(nimg), t_3c_apc_sub(nspins, nimg), t_3c_work_2(3), t_3c_work_3(3))
603 : CALL get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
604 274 : group_size, ngroups, para_env, para_env_sub, ri_data)
605 :
606 : !We go atom by atom, therefore there is an automatic batching along that direction
607 : !Also, because we stack the 3c tensors nimg times, we naturally do some batching there too
608 822 : ALLOCATE (batch_ranges_at(natom + 1))
609 274 : batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
610 274 : iatom = 0
611 1240 : DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
612 1240 : IF (idx_to_at_AO(iblk) == iatom + 1) THEN
613 548 : iatom = iatom + 1
614 548 : batch_ranges_at(iatom) = iblk
615 : END IF
616 : END DO
617 :
618 274 : n_batch_nze = nimg_nze/batch_size
619 274 : IF (MODULO(nimg_nze, batch_size) /= 0) n_batch_nze = n_batch_nze + 1
620 822 : ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
621 668 : DO i_batch = 1, n_batch_nze
622 668 : batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
623 : END DO
624 274 : batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
625 :
626 274 : print_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI%PRINT")
627 274 : CALL section_vals_val_get(print_section, "KP_RI_PROGRESS_BAR", l_val=print_progress)
628 274 : CALL section_vals_val_get(print_section, "KP_RI_MEMORY_ESTIMATE", l_val=estimate_mem)
629 :
630 822 : ALLOCATE (iapc_pairs(nimg, 2))
631 274 : IF (estimate_mem .AND. geometry_did_change) THEN
632 : !Populate work tensors to simulate maximum usage
633 0 : CALL get_iapc_pairs(iapc_pairs, 1, ri_data, qs_env)
634 : CALL fill_3c_stack(t_3c_work_3(1), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
635 : filter_at=1, filter_dim=2, idx_to_at=idx_to_at_AO, &
636 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
637 : CALL fill_3c_stack(t_3c_work_3(2), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
638 : filter_at=1, filter_dim=2, idx_to_at=idx_to_at_AO, &
639 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
640 : CALL fill_3c_stack(t_3c_work_2(1), t_3c_apc_sub(1, :), iapc_pairs(:, 2), 3, &
641 : ri_data, filter_at=1, filter_dim=1, idx_to_at=idx_to_at_AO, &
642 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
643 : CALL fill_3c_stack(t_3c_work_2(2), t_3c_apc_sub(1, :), iapc_pairs(:, 2), 3, &
644 : ri_data, filter_at=1, filter_dim=1, idx_to_at=idx_to_at_AO, &
645 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
646 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, 1, 1, 1, ri_data, qs_env, &
647 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
648 0 : dbcsr_template=dbcsr_template)
649 0 : CALL m_memory(mem)
650 0 : CALL para_env%max(mem)
651 0 : CALL dbt_clear(t_3c_work_2(1))
652 0 : CALL dbt_clear(t_3c_work_2(2))
653 0 : CALL dbt_clear(t_3c_work_3(1))
654 0 : CALL dbt_clear(t_3c_work_3(2))
655 0 : CALL dbt_clear(t_2c_work(1))
656 :
657 0 : IF (ri_data%unit_nr > 0) THEN
658 : WRITE (ri_data%unit_nr, FMT="(T3,A,I14)") &
659 0 : "KP-HFX_RI_INFO| Estimated peak memory usage per MPI rank (MiB):", mem/(1024*1024)
660 0 : CALL m_flush(ri_data%unit_nr)
661 : END IF
662 : END IF
663 :
664 274 : CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
665 274 : CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
666 274 : CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
667 274 : CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
668 :
669 274 : iprint = 1
670 274 : t1 = m_walltime()
671 55490 : ri_data%kp_cost(:, :, :) = 0.0_dp
672 8162 : DO b_img = 1, nimg
673 7888 : IF (print_progress) CALL print_progress_bar(b_img, nimg, iprint, ri_data)
674 7888 : CALL dbt_batched_contract_init(ks_t_split(1))
675 7888 : CALL dbt_batched_contract_init(ks_t_split(2))
676 23664 : DO jatom = 1, natom
677 55216 : DO iatom = 1, natom
678 31552 : IF (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup) CYCLE
679 5163 : pref = 1.0_dp
680 5163 : IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp
681 :
682 : !measure the cost of the given i, j, b configuration
683 5163 : t3 = m_walltime()
684 :
685 : !Get the proper HFX potential 2c integrals (R_i^0|S_j^b)
686 5163 : CALL timeset(routineN//"_2c", handle2)
687 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
688 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
689 5163 : dbcsr_template=dbcsr_template)
690 5163 : CALL dbt_copy(t_2c_work(1), t_2c_work(2), move_data=.TRUE.) !move to split blocks
691 5163 : CALL dbt_filter(t_2c_work(2), ri_data%filter_eps)
692 5163 : CALL timestop(handle2)
693 :
694 5163 : CALL dbt_batched_contract_init(t_2c_work(2))
695 5163 : CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
696 5163 : CALL timeset(routineN//"_3c", handle2)
697 :
698 : !Stack the (S^b|Q^b)^-1 * (Q^b| nu^b lambda^a+c) integrals over a+c and multiply by (R_i^0|S_j^b)
699 13932 : DO i_batch = 1, n_batch_nze
700 : CALL fill_3c_stack(t_3c_work_3(3), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
701 : filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_AO, &
702 26307 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
703 8769 : CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1), move_data=.TRUE.)
704 :
705 : CALL dbt_contract(1.0_dp, t_2c_work(2), t_3c_work_3(1), &
706 : 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
707 : contract_1=[2], notcontract_1=[1], &
708 : contract_2=[1], notcontract_2=[2, 3], &
709 8769 : filter_eps=ri_data%filter_eps, flop=nflop)
710 8769 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
711 8769 : CALL dbt_copy(t_3c_work_3(2), t_3c_work_2(2), order=[2, 1, 3], move_data=.TRUE.)
712 8769 : CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1))
713 :
714 : !Stack the P_sigma^a,lambda^a+c * (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 integrals over a+c and contract
715 : !to get the final block of the KS matrix
716 24709 : DO i_spin = 1, nspins
717 : CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
718 : ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_AO, &
719 32331 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
720 10777 : CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
721 :
722 10777 : IF (nze == 0) CYCLE
723 10485 : CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.TRUE.)
724 : CALL dbt_contract(-pref*fac, t_3c_work_2(1), t_3c_work_2(2), &
725 : 1.0_dp, ks_t_split(i_spin), map_1=[1], map_2=[2], &
726 : contract_1=[2, 3], notcontract_1=[1], &
727 : contract_2=[2, 3], notcontract_2=[1], &
728 : filter_eps=ri_data%filter_eps, &
729 10485 : move_data=i_spin == nspins, flop=nflop)
730 30031 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
731 : END DO
732 : END DO !i_batch
733 5163 : CALL timestop(handle2)
734 5163 : CALL dbt_batched_contract_finalize(t_2c_work(2))
735 :
736 5163 : t4 = m_walltime()
737 57654 : ri_data%kp_cost(iatom, jatom, b_img) = t4 - t3
738 : END DO !iatom
739 : END DO !jatom
740 7888 : CALL dbt_batched_contract_finalize(ks_t_split(1))
741 7888 : CALL dbt_batched_contract_finalize(ks_t_split(2))
742 :
743 17616 : DO i_spin = 1, nspins
744 9454 : CALL dbt_copy(ks_t_split(i_spin), t_2c_ao_tmp(1), move_data=.TRUE.)
745 17342 : CALL dbt_copy(t_2c_ao_tmp(1), ks_t_sub(i_spin, b_img), summation=.TRUE.)
746 : END DO
747 : END DO !b_img
748 274 : CALL dbt_batched_contract_finalize(t_3c_work_3(1))
749 274 : CALL dbt_batched_contract_finalize(t_3c_work_3(2))
750 274 : CALL dbt_batched_contract_finalize(t_3c_work_2(1))
751 274 : CALL dbt_batched_contract_finalize(t_3c_work_2(2))
752 274 : CALL para_env%sync()
753 274 : CALL para_env%sum(ri_data%dbcsr_nflop)
754 274 : CALL para_env%sum(ri_data%kp_cost)
755 274 : t2 = m_walltime()
756 274 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
757 :
758 : !transfer KS tensor from subgroup to main group
759 274 : CALL gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
760 :
761 : !Keep the 3c integrals on the subgroups to avoid communication at next SCF step
762 8162 : DO i_img = 1, nimg
763 8162 : CALL dbt_copy(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img), move_data=.TRUE.)
764 : END DO
765 :
766 : !clean-up subgroup tensors
767 274 : CALL dbt_destroy(t_2c_ao_tmp(1))
768 274 : CALL dbt_destroy(ks_t_split(1))
769 274 : CALL dbt_destroy(ks_t_split(2))
770 274 : CALL dbt_destroy(t_2c_work(1))
771 274 : CALL dbt_destroy(t_2c_work(2))
772 274 : CALL dbt_destroy(t_3c_work_2(1))
773 274 : CALL dbt_destroy(t_3c_work_2(2))
774 274 : CALL dbt_destroy(t_3c_work_2(3))
775 274 : CALL dbt_destroy(t_3c_work_3(1))
776 274 : CALL dbt_destroy(t_3c_work_3(2))
777 274 : CALL dbt_destroy(t_3c_work_3(3))
778 8162 : DO i_img = 1, nimg
779 7888 : CALL dbt_destroy(t_3c_int(i_img))
780 7888 : CALL dbcsr_release(mat_2c_pot(i_img))
781 17616 : DO i_spin = 1, nspins
782 9454 : CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
783 17342 : CALL dbt_destroy(ks_t_sub(i_spin, i_img))
784 : END DO
785 : END DO
786 274 : IF (ASSOCIATED(dbcsr_template)) THEN
787 274 : CALL dbcsr_release(dbcsr_template)
788 274 : DEALLOCATE (dbcsr_template)
789 : END IF
790 :
791 : !End of subgroup parallelization
792 274 : CALL cp_blacs_env_release(blacs_env_sub)
793 274 : CALL para_env_sub%free()
794 274 : DEALLOCATE (para_env_sub)
795 :
796 : !Currently, rho_ao_t holds the density difference (wrt to pref SCF step).
797 : !ks_t also hold that diff, while only having half the blocks => need to add to prev ks_t and symmetrize
798 : !We need the full thing for the energy, on the next SCF step
799 274 : CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
800 632 : DO i_spin = 1, nspins
801 10086 : DO b_img = 1, nimg
802 9454 : CALL dbt_copy(ks_t(i_spin, b_img), ri_data%ks_t(i_spin, b_img), summation=.TRUE.)
803 :
804 : !desymmetrize
805 9454 : mb_img = get_opp_index(b_img, qs_env)
806 9812 : IF (mb_img > 0 .AND. mb_img <= nimg) THEN
807 8438 : CALL dbt_copy(ks_t(i_spin, mb_img), ri_data%ks_t(i_spin, b_img), order=[2, 1], summation=.TRUE.)
808 : END IF
809 : END DO
810 : END DO
811 8162 : DO b_img = 1, nimg
812 17616 : DO i_spin = 1, nspins
813 17342 : CALL dbt_destroy(ks_t(i_spin, b_img))
814 : END DO
815 : END DO
816 :
817 : !calculate the energy
818 274 : CALL dbt_create(ri_data%ks_t(1, 1), t_2c_ao_tmp(1))
819 274 : CALL dbcsr_create(tmp, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_symmetric)
820 274 : CALL dbcsr_create(ks_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
821 274 : CALL dbcsr_create(rho_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
822 274 : ehfx = 0.0_dp
823 8162 : DO i_img = 1, nimg
824 17616 : DO i_spin = 1, nspins
825 9454 : CALL dbt_filter(ri_data%ks_t(i_spin, i_img), ri_data%filter_eps)
826 9454 : CALL dbt_copy(ri_data%ks_t(i_spin, i_img), t_2c_ao_tmp(1))
827 9454 : CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), ks_desymm)
828 9454 : CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), tmp)
829 9454 : CALL dbcsr_add(ks_matrix(i_spin, i_img)%matrix, tmp, 1.0_dp, 1.0_dp)
830 :
831 9454 : CALL dbt_copy(ri_data%rho_ao_t(i_spin, i_img), t_2c_ao_tmp(1))
832 9454 : CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), rho_desymm)
833 :
834 9454 : CALL dbcsr_dot(ks_desymm, rho_desymm, etmp)
835 9454 : ehfx = ehfx + 0.5_dp*etmp
836 :
837 17342 : IF (.NOT. use_delta_p) CALL dbt_clear(ri_data%ks_t(i_spin, i_img))
838 : END DO
839 : END DO
840 274 : CALL dbcsr_release(rho_desymm)
841 274 : CALL dbcsr_release(ks_desymm)
842 274 : CALL dbcsr_release(tmp)
843 274 : CALL dbt_destroy(t_2c_ao_tmp(1))
844 :
845 274 : CALL timestop(handle)
846 :
847 50362 : END SUBROUTINE hfx_ri_update_ks_kp
848 :
849 : ! **************************************************************************************************
850 : !> \brief Update the K-points RI-HFX forces
851 : !> \param qs_env ...
852 : !> \param ri_data ...
853 : !> \param nspins ...
854 : !> \param hf_fraction ...
855 : !> \param rho_ao ...
856 : !> \param use_virial ...
857 : !> \note Because this routine uses stored quantities calculated in the energy calculation, they should
858 : !> always be called by pairs, and with the same input densities
859 : ! **************************************************************************************************
860 50 : SUBROUTINE hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
861 :
862 : TYPE(qs_environment_type), POINTER :: qs_env
863 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
864 : INTEGER, INTENT(IN) :: nspins
865 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
866 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
867 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial
868 :
869 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces_kp'
870 :
871 : INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_loop, i_spin, &
872 : i_xyz, iatom, iblk, igroup, j_xyz, jatom, k_xyz, n_batch, natom, ngroups, nimg, nimg_nze
873 : INTEGER(int_8) :: nflop, nze
874 50 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_ranges_at, &
875 50 : batch_ranges_nze, dist1, dist2, &
876 50 : i_images, idx_to_at_AO, idx_to_at_RI, &
877 50 : kind_of
878 50 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
879 50 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: force_pattern, sparsity_pattern
880 : INTEGER, DIMENSION(2, 1) :: bounds_iat, bounds_jat
881 : LOGICAL :: use_virial_prv
882 : REAL(dp) :: fac, occ, pref, t1, t2
883 : REAL(dp), DIMENSION(3, 3) :: work_virial
884 50 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
885 : TYPE(cell_type), POINTER :: cell
886 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
887 50 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
888 50 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_pot, mat_der_pot_sub
889 : TYPE(dbcsr_type), POINTER :: dbcsr_template
890 850 : TYPE(dbt_type) :: t_2c_R, t_2c_R_split
891 50 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_bint, t_2c_binv, t_2c_der_pot, &
892 100 : t_2c_inv, t_2c_metric, t_2c_work, &
893 50 : t_3c_der_stack, t_3c_work_2, &
894 50 : t_3c_work_3
895 50 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
896 100 : t_2c_der_metric_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_AO, t_3c_der_AO_sub, t_3c_der_RI, &
897 50 : t_3c_der_RI_sub
898 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
899 50 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
900 50 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
901 : TYPE(section_vals_type), POINTER :: hfx_section
902 : TYPE(virial_type), POINTER :: virial
903 :
904 50 : NULLIFY (para_env, para_env_sub, hfx_section, blacs_env_sub, dbcsr_template, force, atomic_kind_set, &
905 50 : virial, particle_set, cell)
906 :
907 50 : CALL timeset(routineN, handle)
908 :
909 50 : use_virial_prv = .FALSE.
910 50 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
911 :
912 50 : IF (nspins == 1) THEN
913 34 : fac = 0.5_dp*hf_fraction
914 : ELSE
915 16 : fac = 1.0_dp*hf_fraction
916 : END IF
917 :
918 : CALL get_qs_env(qs_env, natom=natom, para_env=para_env, force=force, cell=cell, virial=virial, &
919 50 : atomic_kind_set=atomic_kind_set, particle_set=particle_set)
920 50 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
921 :
922 150 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
923 50 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
924 :
925 150 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
926 50 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
927 :
928 50 : nimg = ri_data%nimg
929 15298 : ALLOCATE (t_3c_der_RI(nimg, 3), t_3c_der_AO(nimg, 3), mat_der_pot(nimg, 3), t_2c_der_metric(natom, 3))
930 :
931 : !We assume that the integrals are available from the SCF
932 : !pre-calculate the derivs. 3c tensors as (P^0| sigma^a mu^0), with t_3c_der_AO holding deriv wrt mu^0
933 50 : CALL precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
934 :
935 : !Calculate the density matrix at each image
936 3624 : ALLOCATE (rho_ao_t(nspins, nimg))
937 : CALL create_2c_tensor(rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
938 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
939 50 : name="(AO | AO)")
940 50 : DEALLOCATE (dist1, dist2)
941 50 : IF (nspins == 2) CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(2, 1))
942 1422 : DO i_img = 2, nimg
943 2958 : DO i_spin = 1, nspins
944 2908 : CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(i_spin, i_img))
945 : END DO
946 : END DO
947 50 : CALL get_pmat_images(rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
948 :
949 : !Contract integrals with the density matrix
950 3624 : ALLOCATE (t_3c_apc(nspins, nimg))
951 1472 : DO i_img = 1, nimg
952 3074 : DO i_spin = 1, nspins
953 3024 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
954 : END DO
955 : END DO
956 50 : CALL contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
957 :
958 : !Setup the subgroups
959 50 : hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
960 50 : CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
961 50 : group_size = para_env%num_pe/ngroups
962 50 : igroup = para_env%mepos/group_size
963 :
964 50 : ALLOCATE (para_env_sub)
965 50 : CALL para_env_sub%from_split(para_env, igroup)
966 50 : CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
967 :
968 : !Get the ususal sparsity pattern
969 250 : ALLOCATE (sparsity_pattern(natom, natom, nimg))
970 50 : CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
971 50 : CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
972 :
973 : !Get the 2-center quantities in the subgroups (note: main group derivs are deleted wihtin)
974 0 : ALLOCATE (t_2c_inv(natom), mat_2c_pot(nimg), rho_ao_t_sub(nspins, nimg), t_2c_work(5), &
975 0 : t_2c_der_metric_sub(natom, 3), mat_der_pot_sub(nimg, 3), t_2c_bint(natom), &
976 13562 : t_2c_metric(natom), t_2c_binv(natom))
977 : CALL get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
978 : rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
979 50 : mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
980 50 : CALL dbt_create(t_2c_work(1), t_2c_R) !nRI x nRI
981 50 : CALL dbt_create(t_2c_work(5), t_2c_R_split) !nRI x nRI with split blocks
982 :
983 600 : ALLOCATE (t_2c_der_pot(3))
984 200 : DO i_xyz = 1, 3
985 200 : CALL dbt_create(t_2c_R, t_2c_der_pot(i_xyz))
986 : END DO
987 :
988 : !Get the 3-center quantities in the subgroups. The integrals and t_3c_apc already there
989 0 : ALLOCATE (t_3c_work_2(3), t_3c_work_3(4), t_3c_der_stack(6), t_3c_der_AO_sub(nimg, 3), &
990 15206 : t_3c_der_RI_sub(nimg, 3), t_3c_apc_sub(nspins, nimg))
991 : CALL get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
992 : t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_stack, &
993 50 : group_size, ngroups, para_env, para_env_sub, ri_data)
994 :
995 : !Set up batched contraction (go atom by atom)
996 150 : ALLOCATE (batch_ranges_at(natom + 1))
997 50 : batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
998 50 : iatom = 0
999 244 : DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
1000 244 : IF (idx_to_at_AO(iblk) == iatom + 1) THEN
1001 100 : iatom = iatom + 1
1002 100 : batch_ranges_at(iatom) = iblk
1003 : END IF
1004 : END DO
1005 :
1006 50 : CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
1007 50 : CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
1008 50 : CALL dbt_batched_contract_init(t_3c_work_3(3), batch_range_2=batch_ranges_at)
1009 50 : CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
1010 50 : CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
1011 :
1012 : !Preparing for the stacking of 3c tensors
1013 50 : nimg_nze = ri_data%nimg_nze
1014 50 : batch_size = ri_data%kp_stack_size
1015 50 : n_batch = nimg_nze/batch_size
1016 50 : IF (MODULO(nimg_nze, batch_size) /= 0) n_batch = n_batch + 1
1017 150 : ALLOCATE (batch_ranges_nze(n_batch + 1))
1018 128 : DO i_batch = 1, n_batch
1019 128 : batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
1020 : END DO
1021 50 : batch_ranges_nze(n_batch + 1) = nimg_nze + 1
1022 :
1023 : !Applying the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 from left and right
1024 : !And keep the bump on LHS only version as well, with B*M^-1 = (M^-1*B)^T
1025 150 : DO iatom = 1, natom
1026 100 : CALL dbt_create(t_2c_inv(iatom), t_2c_binv(iatom))
1027 100 : CALL dbt_copy(t_2c_inv(iatom), t_2c_binv(iatom))
1028 100 : CALL apply_bump(t_2c_binv(iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.FALSE.)
1029 150 : CALL apply_bump(t_2c_inv(iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
1030 : END DO
1031 :
1032 50 : t1 = m_walltime()
1033 50 : work_virial = 0.0_dp
1034 250 : ALLOCATE (iapc_pairs(nimg, 2), i_images(nimg))
1035 250 : ALLOCATE (force_pattern(natom, natom, nimg))
1036 10004 : force_pattern(:, :, :) = -1
1037 : !We proceed with 2 loops: one over the sparsity pattern from the SCF, one over the rest
1038 : !We use the SCF cost model for the first loop, while we calculate the cost of the upcoming loop
1039 150 : DO i_loop = 1, 2
1040 2944 : DO b_img = 1, nimg
1041 8632 : DO jatom = 1, natom
1042 19908 : DO iatom = 1, natom
1043 :
1044 11376 : pref = -0.5_dp*fac
1045 11376 : IF (i_loop == 1 .AND. (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup)) CYCLE
1046 6429 : IF (i_loop == 2 .AND. (.NOT. force_pattern(iatom, jatom, b_img) == igroup)) CYCLE
1047 :
1048 : !Get the proper HFX potential 2c integrals (R_i^0|S_j^b), times (S_j^b|Q_j^b)^-1
1049 1414 : CALL timeset(routineN//"_2c_1", handle2)
1050 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1051 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1052 1414 : dbcsr_template=dbcsr_template)
1053 : CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_inv(jatom), &
1054 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1055 : contract_1=[2], notcontract_1=[1], &
1056 : contract_2=[1], notcontract_2=[2], &
1057 1414 : filter_eps=ri_data%filter_eps, flop=nflop)
1058 1414 : CALL dbt_copy(t_2c_work(2), t_2c_work(5), move_data=.TRUE.) !move to split blocks
1059 1414 : CALL dbt_filter(t_2c_work(5), ri_data%filter_eps)
1060 1414 : CALL timestop(handle2)
1061 :
1062 1414 : CALL timeset(routineN//"_3c", handle2)
1063 7056 : bounds_iat(:, 1) = [SUM(ri_data%bsizes_AO(1:iatom - 1)) + 1, SUM(ri_data%bsizes_AO(1:iatom))]
1064 7020 : bounds_jat(:, 1) = [SUM(ri_data%bsizes_AO(1:jatom - 1)) + 1, SUM(ri_data%bsizes_AO(1:jatom))]
1065 1414 : CALL dbt_clear(t_2c_R_split)
1066 :
1067 3089 : DO i_spin = 1, nspins
1068 3089 : CALL dbt_batched_contract_init(rho_ao_t_sub(i_spin, b_img))
1069 : END DO
1070 :
1071 1414 : CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env, i_images) !i = a+c-b
1072 4147 : DO i_batch = 1, n_batch
1073 :
1074 : !Stack the 3c derivatives to take the trace later on
1075 10932 : DO i_xyz = 1, 3
1076 8199 : CALL dbt_clear(t_3c_der_stack(i_xyz))
1077 : CALL fill_3c_stack(t_3c_der_stack(i_xyz), t_3c_der_RI_sub(:, i_xyz), &
1078 : iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1079 : filter_dim=2, idx_to_at=idx_to_at_AO, &
1080 24597 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1081 :
1082 8199 : CALL dbt_clear(t_3c_der_stack(3 + i_xyz))
1083 : CALL fill_3c_stack(t_3c_der_stack(3 + i_xyz), t_3c_der_AO_sub(:, i_xyz), &
1084 : iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1085 : filter_dim=2, idx_to_at=idx_to_at_AO, &
1086 27330 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1087 : END DO
1088 :
1089 7241 : DO i_spin = 1, nspins
1090 : !stack the t_3c_apc tensors
1091 3094 : CALL dbt_clear(t_3c_work_2(3))
1092 : CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
1093 : ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_AO, &
1094 9282 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1095 3094 : CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
1096 3094 : IF (nze == 0) CYCLE
1097 3080 : CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.TRUE.)
1098 :
1099 : !Contract with the second density matrix: P_mu^0,nu^b * t_3c_apc,
1100 : !where t_3c_apc = P_sigma^a,lambda^a+c (mu^0 P^0 sigma^a) *(P^0|R^0)^-1 (stacked along a+c)
1101 : CALL dbt_contract(1.0_dp, rho_ao_t_sub(i_spin, b_img), t_3c_work_2(1), &
1102 : 0.0_dp, t_3c_work_2(2), map_1=[1], map_2=[2, 3], &
1103 : contract_1=[1], notcontract_1=[2], &
1104 : contract_2=[1], notcontract_2=[2, 3], &
1105 : bounds_1=bounds_iat, bounds_2=bounds_jat, &
1106 3080 : filter_eps=ri_data%filter_eps, flop=nflop)
1107 3080 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1108 :
1109 3080 : CALL get_tensor_occupancy(t_3c_work_2(2), nze, occ)
1110 3080 : IF (nze == 0) CYCLE
1111 :
1112 : !Contract with V_PQ so that we can take the trace with (Q^b|nu^b lmabda^a+c)^(x)
1113 2654 : CALL dbt_copy(t_3c_work_2(2), t_3c_work_3(1), order=[2, 1, 3], move_data=.TRUE.)
1114 2654 : CALL dbt_batched_contract_init(t_2c_work(5))
1115 : CALL dbt_contract(1.0_dp, t_2c_work(5), t_3c_work_3(1), &
1116 : 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
1117 : contract_1=[1], notcontract_1=[2], &
1118 : contract_2=[1], notcontract_2=[2, 3], &
1119 2654 : filter_eps=ri_data%filter_eps, flop=nflop)
1120 2654 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1121 2654 : CALL dbt_batched_contract_finalize(t_2c_work(5))
1122 :
1123 : !Contract with the 3c derivatives to get the force/virial
1124 2654 : CALL dbt_copy(t_3c_work_3(2), t_3c_work_3(4), move_data=.TRUE.)
1125 2654 : IF (use_virial_prv) THEN
1126 : CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1127 : t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1128 : idx_to_at_RI, idx_to_at_AO, i_images, &
1129 : batch_ranges_nze(i_batch), 2.0_dp*pref, &
1130 460 : ri_data, qs_env, work_virial, cell, particle_set)
1131 : ELSE
1132 : CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1133 : t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1134 : idx_to_at_RI, idx_to_at_AO, i_images, &
1135 : batch_ranges_nze(i_batch), 2.0_dp*pref, &
1136 2194 : ri_data, qs_env)
1137 : END IF
1138 2654 : CALL dbt_clear(t_3c_work_3(4))
1139 :
1140 : !Contract with the 3-center integrals in order to have a matrix R_PQ such that
1141 : !we can take the trace sum_PQ R_PQ (P^0|Q^b)^(x)
1142 2654 : IF (i_loop == 2) CYCLE
1143 :
1144 : !Stack the 3c integrals
1145 : CALL fill_3c_stack(t_3c_work_3(4), ri_data%kp_t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
1146 : filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_AO, &
1147 4179 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1148 1393 : CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(3), move_data=.TRUE.)
1149 :
1150 1393 : CALL dbt_batched_contract_init(t_2c_R_split)
1151 : CALL dbt_contract(1.0_dp, t_3c_work_3(1), t_3c_work_3(3), &
1152 : 1.0_dp, t_2c_R_split, map_1=[1], map_2=[2], &
1153 : contract_1=[2, 3], notcontract_1=[1], &
1154 : contract_2=[2, 3], notcontract_2=[1], &
1155 1393 : filter_eps=ri_data%filter_eps, flop=nflop)
1156 1393 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1157 1393 : CALL dbt_batched_contract_finalize(t_2c_R_split)
1158 10300 : CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(1))
1159 : END DO
1160 : END DO
1161 3089 : DO i_spin = 1, nspins
1162 3089 : CALL dbt_batched_contract_finalize(rho_ao_t_sub(i_spin, b_img))
1163 : END DO
1164 1414 : CALL timestop(handle2)
1165 :
1166 1414 : IF (i_loop == 2) CYCLE
1167 741 : pref = 2.0_dp*pref
1168 741 : IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp*pref
1169 :
1170 741 : CALL timeset(routineN//"_2c_2", handle2)
1171 : !Note that the derivatives are in atomic block format (not split)
1172 741 : CALL dbt_copy(t_2c_R_split, t_2c_R, move_data=.TRUE.)
1173 :
1174 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1175 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1176 741 : dbcsr_template=dbcsr_template)
1177 :
1178 : !We have to calculate: S^-1(iat) * R_PQ * S^-1(jat) to trace with HFX pot der
1179 : ! + R_PQ * S^-1(jat) * pot^T to trace with S^(x) (iat)
1180 : ! + pot^T * S^-1(iat) *R_PQ to trace with S^(x) (jat)
1181 :
1182 : !Because 3c tensors are all precontracted with the inverse RI metric,
1183 : !t_2c_R is currently implicitely multiplied by S^-1(iat) from the left
1184 : !and S^-1(jat) from the right, directly in the proper format for the trace
1185 : !with the HFX potential derivative
1186 :
1187 : !Trace with HFX pot deriv, that we need to build first
1188 2964 : DO i_xyz = 1, 3
1189 : CALL get_ext_2c_int(t_2c_der_pot(i_xyz), mat_der_pot_sub(:, i_xyz), iatom, jatom, &
1190 : b_img, ri_data, qs_env, blacs_env_ext=blacs_env_sub, &
1191 2964 : para_env_ext=para_env_sub, dbcsr_template=dbcsr_template)
1192 : END DO
1193 :
1194 741 : IF (use_virial_prv) THEN
1195 : CALL get_2c_der_force(force, t_2c_R, t_2c_der_pot, atom_of_kind, kind_of, &
1196 125 : b_img, pref, ri_data, qs_env, work_virial, cell, particle_set)
1197 : ELSE
1198 : CALL get_2c_der_force(force, t_2c_R, t_2c_der_pot, atom_of_kind, kind_of, &
1199 616 : b_img, pref, ri_data, qs_env)
1200 : END IF
1201 :
1202 2964 : DO i_xyz = 1, 3
1203 2964 : CALL dbt_clear(t_2c_der_pot(i_xyz))
1204 : END DO
1205 :
1206 : !R_PQ * S^-1(jat) * pot^T (=A)
1207 : CALL dbt_contract(1.0_dp, t_2c_metric(iatom), t_2c_R, & !get rid of implicit S^-1(iat)
1208 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1209 : contract_1=[2], notcontract_1=[1], &
1210 : contract_2=[1], notcontract_2=[2], &
1211 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1212 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1213 : CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_work(1), &
1214 : 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1215 : contract_1=[2], notcontract_1=[1], &
1216 : contract_2=[2], notcontract_2=[1], &
1217 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1218 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1219 :
1220 : !With the RI bump function, things get more complex. M = (S|P)_D + B*(S|P)_OD*B
1221 : !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1222 : CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(iatom), &
1223 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1224 : contract_1=[2], notcontract_1=[1], &
1225 : contract_2=[1], notcontract_2=[2], &
1226 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1227 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1228 :
1229 : CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1230 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1231 : contract_1=[1], notcontract_1=[2], &
1232 : contract_2=[1], notcontract_2=[2], &
1233 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1234 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1235 :
1236 741 : CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.TRUE.)
1237 : CALL get_2c_bump_forces(force, t_2c_work(4), iatom, atom_of_kind, kind_of, pref, &
1238 741 : ri_data, qs_env, work_virial)
1239 :
1240 : !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1241 : CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(2), &
1242 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1243 : contract_1=[1], notcontract_1=[2], &
1244 : contract_2=[1], notcontract_2=[2], &
1245 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1246 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1247 :
1248 741 : IF (use_virial_prv) THEN
1249 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1250 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1251 125 : diag=.TRUE., offdiag=.FALSE.)
1252 : ELSE
1253 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1254 616 : kind_of, 1, -pref, ri_data, qs_env, diag=.TRUE., offdiag=.FALSE.)
1255 : END IF
1256 :
1257 : !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1258 741 : CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1259 741 : CALL apply_bump(t_2c_work(2), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
1260 :
1261 741 : IF (use_virial_prv) THEN
1262 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1263 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1264 125 : diag=.FALSE., offdiag=.TRUE.)
1265 : ELSE
1266 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1267 616 : kind_of, 1, -pref, ri_data, qs_env, diag=.FALSE., offdiag=.TRUE.)
1268 : END IF
1269 :
1270 : !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1271 : !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1272 : CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(iatom), &
1273 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1274 : contract_1=[2], notcontract_1=[1], &
1275 : contract_2=[1], notcontract_2=[2], &
1276 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1277 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1278 :
1279 : CALL dbt_contract(1.0_dp, t_2c_bint(iatom), t_2c_work(4), &
1280 : 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1281 : contract_1=[1], notcontract_1=[2], &
1282 : contract_2=[1], notcontract_2=[2], &
1283 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1284 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1285 :
1286 : CALL get_2c_bump_forces(force, t_2c_work(2), iatom, atom_of_kind, kind_of, -pref, &
1287 741 : ri_data, qs_env, work_virial)
1288 :
1289 : ! pot^T * S^-1(iat) * R_PQ (=A)
1290 : CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_R, &
1291 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1292 : contract_1=[1], notcontract_1=[2], &
1293 : contract_2=[1], notcontract_2=[2], &
1294 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1295 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1296 :
1297 : CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_metric(jatom), & !get rid of implicit S^-1(jat)
1298 : 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1299 : contract_1=[2], notcontract_1=[1], &
1300 : contract_2=[1], notcontract_2=[2], &
1301 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1302 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1303 :
1304 : !Do the same shenanigans with the S^(x) (jatom)
1305 : !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1306 : CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(jatom), &
1307 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1308 : contract_1=[2], notcontract_1=[1], &
1309 : contract_2=[1], notcontract_2=[2], &
1310 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1311 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1312 :
1313 : CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1314 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1315 : contract_1=[1], notcontract_1=[2], &
1316 : contract_2=[1], notcontract_2=[2], &
1317 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1318 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1319 :
1320 741 : CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.TRUE.)
1321 : CALL get_2c_bump_forces(force, t_2c_work(4), jatom, atom_of_kind, kind_of, pref, &
1322 741 : ri_data, qs_env, work_virial)
1323 :
1324 : !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1325 : CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(2), &
1326 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1327 : contract_1=[1], notcontract_1=[2], &
1328 : contract_2=[1], notcontract_2=[2], &
1329 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1330 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1331 :
1332 741 : IF (use_virial_prv) THEN
1333 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1334 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1335 125 : diag=.TRUE., offdiag=.FALSE.)
1336 : ELSE
1337 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1338 616 : kind_of, 1, -pref, ri_data, qs_env, diag=.TRUE., offdiag=.FALSE.)
1339 : END IF
1340 :
1341 : !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1342 741 : CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1343 741 : CALL apply_bump(t_2c_work(2), jatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
1344 :
1345 741 : IF (use_virial_prv) THEN
1346 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1347 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1348 125 : diag=.FALSE., offdiag=.TRUE.)
1349 : ELSE
1350 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1351 616 : kind_of, 1, -pref, ri_data, qs_env, diag=.FALSE., offdiag=.TRUE.)
1352 : END IF
1353 :
1354 : !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1355 : !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1356 : CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(jatom), &
1357 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1358 : contract_1=[2], notcontract_1=[1], &
1359 : contract_2=[1], notcontract_2=[2], &
1360 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1361 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1362 :
1363 : CALL dbt_contract(1.0_dp, t_2c_bint(jatom), t_2c_work(4), &
1364 : 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1365 : contract_1=[1], notcontract_1=[2], &
1366 : contract_2=[1], notcontract_2=[2], &
1367 741 : filter_eps=ri_data%filter_eps, flop=nflop)
1368 741 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1369 :
1370 : CALL get_2c_bump_forces(force, t_2c_work(2), jatom, atom_of_kind, kind_of, -pref, &
1371 741 : ri_data, qs_env, work_virial)
1372 :
1373 19960 : CALL timestop(handle2)
1374 : END DO !iatom
1375 : END DO !jatom
1376 : END DO !b_img
1377 :
1378 150 : IF (i_loop == 1) THEN
1379 50 : CALL update_pattern_to_forces(force_pattern, sparsity_pattern, ngroups, ri_data, qs_env)
1380 : END IF
1381 : END DO !i_loop
1382 :
1383 50 : CALL dbt_batched_contract_finalize(t_3c_work_3(1))
1384 50 : CALL dbt_batched_contract_finalize(t_3c_work_3(2))
1385 50 : CALL dbt_batched_contract_finalize(t_3c_work_3(3))
1386 50 : CALL dbt_batched_contract_finalize(t_3c_work_2(1))
1387 50 : CALL dbt_batched_contract_finalize(t_3c_work_2(2))
1388 :
1389 50 : IF (use_virial_prv) THEN
1390 40 : DO k_xyz = 1, 3
1391 130 : DO j_xyz = 1, 3
1392 390 : DO i_xyz = 1, 3
1393 : virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
1394 360 : + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
1395 : END DO
1396 : END DO
1397 : END DO
1398 : END IF
1399 :
1400 : !End of subgroup parallelization
1401 50 : CALL cp_blacs_env_release(blacs_env_sub)
1402 50 : CALL para_env_sub%free()
1403 50 : DEALLOCATE (para_env_sub)
1404 :
1405 50 : CALL para_env%sync()
1406 50 : t2 = m_walltime()
1407 50 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1408 :
1409 : !clean-up
1410 50 : IF (ASSOCIATED(dbcsr_template)) THEN
1411 50 : CALL dbcsr_release(dbcsr_template)
1412 50 : DEALLOCATE (dbcsr_template)
1413 : END IF
1414 50 : CALL dbt_destroy(t_2c_R)
1415 50 : CALL dbt_destroy(t_2c_R_split)
1416 50 : CALL dbt_destroy(t_2c_work(1))
1417 50 : CALL dbt_destroy(t_2c_work(2))
1418 50 : CALL dbt_destroy(t_2c_work(3))
1419 50 : CALL dbt_destroy(t_2c_work(4))
1420 50 : CALL dbt_destroy(t_2c_work(5))
1421 50 : CALL dbt_destroy(t_3c_work_2(1))
1422 50 : CALL dbt_destroy(t_3c_work_2(2))
1423 50 : CALL dbt_destroy(t_3c_work_2(3))
1424 50 : CALL dbt_destroy(t_3c_work_3(1))
1425 50 : CALL dbt_destroy(t_3c_work_3(2))
1426 50 : CALL dbt_destroy(t_3c_work_3(3))
1427 50 : CALL dbt_destroy(t_3c_work_3(4))
1428 50 : CALL dbt_destroy(t_3c_der_stack(1))
1429 50 : CALL dbt_destroy(t_3c_der_stack(2))
1430 50 : CALL dbt_destroy(t_3c_der_stack(3))
1431 50 : CALL dbt_destroy(t_3c_der_stack(4))
1432 50 : CALL dbt_destroy(t_3c_der_stack(5))
1433 50 : CALL dbt_destroy(t_3c_der_stack(6))
1434 200 : DO i_xyz = 1, 3
1435 200 : CALL dbt_destroy(t_2c_der_pot(i_xyz))
1436 : END DO
1437 150 : DO iatom = 1, natom
1438 100 : CALL dbt_destroy(t_2c_inv(iatom))
1439 100 : CALL dbt_destroy(t_2c_binv(iatom))
1440 100 : CALL dbt_destroy(t_2c_bint(iatom))
1441 100 : CALL dbt_destroy(t_2c_metric(iatom))
1442 450 : DO i_xyz = 1, 3
1443 400 : CALL dbt_destroy(t_2c_der_metric_sub(iatom, i_xyz))
1444 : END DO
1445 : END DO
1446 1472 : DO i_img = 1, nimg
1447 1422 : CALL dbcsr_release(mat_2c_pot(i_img))
1448 3074 : DO i_spin = 1, nspins
1449 1602 : CALL dbt_destroy(rho_ao_t_sub(i_spin, i_img))
1450 3024 : CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
1451 : END DO
1452 : END DO
1453 200 : DO i_xyz = 1, 3
1454 4466 : DO i_img = 1, nimg
1455 4266 : CALL dbt_destroy(t_3c_der_RI_sub(i_img, i_xyz))
1456 4266 : CALL dbt_destroy(t_3c_der_AO_sub(i_img, i_xyz))
1457 4416 : CALL dbcsr_release(mat_der_pot_sub(i_img, i_xyz))
1458 : END DO
1459 : END DO
1460 :
1461 50 : CALL timestop(handle)
1462 :
1463 25772 : END SUBROUTINE hfx_ri_update_forces_kp
1464 :
1465 : ! **************************************************************************************************
1466 : !> \brief A routine the applies the RI bump matrix from the left and/or the right, given an input
1467 : !> matrix and the central RI atom. We assume atomic block sizes
1468 : !> \param t_2c_inout ...
1469 : !> \param atom_i ...
1470 : !> \param ri_data ...
1471 : !> \param qs_env ...
1472 : !> \param from_left ...
1473 : !> \param from_right ...
1474 : !> \param debump ...
1475 : ! **************************************************************************************************
1476 2210 : SUBROUTINE apply_bump(t_2c_inout, atom_i, ri_data, qs_env, from_left, from_right, debump)
1477 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_inout
1478 : INTEGER, INTENT(IN) :: atom_i
1479 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1480 : TYPE(qs_environment_type), POINTER :: qs_env
1481 : LOGICAL, INTENT(IN), OPTIONAL :: from_left, from_right, debump
1482 :
1483 : INTEGER :: i_img, i_RI, iatom, ind(2), j_img, j_RI, &
1484 : jatom, natom, nblks(2), nimg, nkind
1485 2210 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1486 2210 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1487 : LOGICAL :: found, my_debump, my_left, my_right
1488 : REAL(dp) :: bval, r0, r1, ri(3), rj(3), rref(3), &
1489 : scoord(3)
1490 2210 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1491 : TYPE(cell_type), POINTER :: cell
1492 : TYPE(dbt_iterator_type) :: iter
1493 : TYPE(kpoint_type), POINTER :: kpoints
1494 2210 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1495 2210 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1496 :
1497 2210 : NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1498 :
1499 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1500 2210 : kpoints=kpoints, particle_set=particle_set)
1501 2210 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1502 :
1503 2210 : my_debump = .FALSE.
1504 2210 : IF (PRESENT(debump)) my_debump = debump
1505 :
1506 2210 : my_left = .FALSE.
1507 2210 : IF (PRESENT(from_left)) my_left = from_left
1508 :
1509 2210 : my_right = .FALSE.
1510 2210 : IF (PRESENT(from_right)) my_right = from_right
1511 2210 : CPASSERT(my_left .OR. my_right)
1512 :
1513 2210 : CALL dbt_get_info(t_2c_inout, nblks_total=nblks)
1514 2210 : CPASSERT(nblks(1) == ri_data%ncell_RI*natom)
1515 2210 : CPASSERT(nblks(2) == ri_data%ncell_RI*natom)
1516 :
1517 2210 : nimg = ri_data%nimg
1518 :
1519 : !Loop over the RI cells and atoms, and apply bump accordingly
1520 2210 : r1 = ri_data%kp_RI_range
1521 2210 : r0 = ri_data%kp_bump_rad
1522 2210 : rref = pbc(particle_set(atom_i)%r, cell)
1523 :
1524 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_inout,natom,ri_data,cell,particle_set,index_to_cell,my_left, &
1525 : !$OMP my_right,r0,r1,rref,my_debump) &
1526 2210 : !$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,bval)
1527 : CALL dbt_iterator_start(iter, t_2c_inout)
1528 : DO WHILE (dbt_iterator_blocks_left(iter))
1529 : CALL dbt_iterator_next_block(iter, ind)
1530 : CALL dbt_get_block(t_2c_inout, ind, blk, found)
1531 : IF (.NOT. found) CYCLE
1532 :
1533 : i_RI = (ind(1) - 1)/natom + 1
1534 : i_img = ri_data%RI_cell_to_img(i_RI)
1535 : iatom = ind(1) - (i_RI - 1)*natom
1536 :
1537 : CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
1538 : CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
1539 :
1540 : j_RI = (ind(2) - 1)/natom + 1
1541 : j_img = ri_data%RI_cell_to_img(j_RI)
1542 : jatom = ind(2) - (j_RI - 1)*natom
1543 :
1544 : CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1545 : CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1546 :
1547 : IF (.NOT. my_debump) THEN
1548 : IF (my_left) blk(:, :) = blk(:, :)*bump(NORM2(ri - rref), r0, r1)
1549 : IF (my_right) blk(:, :) = blk(:, :)*bump(NORM2(rj - rref), r0, r1)
1550 : ELSE
1551 : !Note: by construction, the bump function is never quite zero, as its range is the same
1552 : ! as that of the extended RI basis (but we are safe)
1553 : bval = bump(NORM2(ri - rref), r0, r1)
1554 : IF (my_left .AND. bval > EPSILON(1.0_dp)) blk(:, :) = blk(:, :)/bval
1555 : bval = bump(NORM2(rj - rref), r0, r1)
1556 : IF (my_right .AND. bval > EPSILON(1.0_dp)) blk(:, :) = blk(:, :)/bval
1557 : END IF
1558 :
1559 : CALL dbt_put_block(t_2c_inout, ind, SHAPE(blk), blk)
1560 :
1561 : DEALLOCATE (blk)
1562 : END DO
1563 : CALL dbt_iterator_stop(iter)
1564 : !$OMP END PARALLEL
1565 2210 : CALL dbt_filter(t_2c_inout, ri_data%filter_eps)
1566 :
1567 4420 : END SUBROUTINE apply_bump
1568 :
1569 : ! **************************************************************************************************
1570 : !> \brief A routine that calculates the forces due to the derivative of the bump function
1571 : !> \param force ...
1572 : !> \param t_2c_in ...
1573 : !> \param atom_i ...
1574 : !> \param atom_of_kind ...
1575 : !> \param kind_of ...
1576 : !> \param pref ...
1577 : !> \param ri_data ...
1578 : !> \param qs_env ...
1579 : !> \param work_virial ...
1580 : ! **************************************************************************************************
1581 2964 : SUBROUTINE get_2c_bump_forces(force, t_2c_in, atom_i, atom_of_kind, kind_of, pref, ri_data, &
1582 : qs_env, work_virial)
1583 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1584 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_in
1585 : INTEGER, INTENT(IN) :: atom_i
1586 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
1587 : REAL(dp), INTENT(IN) :: pref
1588 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1589 : TYPE(qs_environment_type), POINTER :: qs_env
1590 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
1591 :
1592 : INTEGER :: i, i_img, i_RI, i_xyz, iat_of_kind, iatom, ikind, ind(2), j_img, j_RI, j_xyz, &
1593 : jat_of_kind, jatom, jkind, natom, nblks(2), nimg, nkind
1594 2964 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1595 2964 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1596 : LOGICAL :: found
1597 : REAL(dp) :: new_force, r0, r1, ri(3), rj(3), &
1598 : rref(3), scoord(3), x
1599 2964 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1600 : TYPE(cell_type), POINTER :: cell
1601 : TYPE(dbt_iterator_type) :: iter
1602 : TYPE(kpoint_type), POINTER :: kpoints
1603 2964 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1604 2964 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1605 :
1606 2964 : NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1607 :
1608 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1609 2964 : kpoints=kpoints, particle_set=particle_set)
1610 2964 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1611 :
1612 2964 : CALL dbt_get_info(t_2c_in, nblks_total=nblks)
1613 2964 : CPASSERT(nblks(1) == ri_data%ncell_RI*natom)
1614 2964 : CPASSERT(nblks(2) == ri_data%ncell_RI*natom)
1615 :
1616 2964 : nimg = ri_data%nimg
1617 :
1618 : !Loop over the RI cells and atoms, and apply bump accordingly
1619 2964 : r1 = ri_data%kp_RI_range
1620 2964 : r0 = ri_data%kp_bump_rad
1621 2964 : rref = pbc(particle_set(atom_i)%r, cell)
1622 :
1623 2964 : iat_of_kind = atom_of_kind(atom_i)
1624 2964 : ikind = kind_of(atom_i)
1625 :
1626 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_in,natom,ri_data,cell,particle_set,index_to_cell,pref, &
1627 : !$OMP force,r0,r1,rref,atom_of_kind,kind_of,iat_of_kind,ikind,work_virial) &
1628 : !$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,jkind,jat_of_kind, &
1629 2964 : !$OMP new_force,i_xyz,i,x,j_xyz)
1630 : CALL dbt_iterator_start(iter, t_2c_in)
1631 : DO WHILE (dbt_iterator_blocks_left(iter))
1632 : CALL dbt_iterator_next_block(iter, ind)
1633 : IF (ind(1) /= ind(2)) CYCLE !bump matrix is diagonal
1634 :
1635 : CALL dbt_get_block(t_2c_in, ind, blk, found)
1636 : IF (.NOT. found) CYCLE
1637 :
1638 : !bump is a function of x = SQRT((R - Rref)^2). We refer to R as jatom, and Rref as atom_i
1639 : j_RI = (ind(2) - 1)/natom + 1
1640 : j_img = ri_data%RI_cell_to_img(j_RI)
1641 : jatom = ind(2) - (j_RI - 1)*natom
1642 : jat_of_kind = atom_of_kind(jatom)
1643 : jkind = kind_of(jatom)
1644 :
1645 : CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1646 : CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1647 : x = NORM2(rj - rref)
1648 : IF (x < r0 .OR. x > r1) CYCLE
1649 :
1650 : new_force = 0.0_dp
1651 : DO i = 1, SIZE(blk, 1)
1652 : new_force = new_force + blk(i, i)
1653 : END DO
1654 : new_force = pref*new_force*dbump(x, r0, r1)
1655 :
1656 : !x = SQRT((R - Rref)^2), so we multiply by dx/dR and dx/dRref
1657 : DO i_xyz = 1, 3
1658 : !Force acting on second atom
1659 : !$OMP ATOMIC
1660 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) + &
1661 : new_force*(rj(i_xyz) - rref(i_xyz))/x
1662 :
1663 : !virial acting on second atom
1664 : CALL real_to_scaled(scoord, rj, cell)
1665 : DO j_xyz = 1, 3
1666 : !$OMP ATOMIC
1667 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1668 : + new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1669 : END DO
1670 :
1671 : !Force acting on reference atom, defining the RI basis
1672 : !$OMP ATOMIC
1673 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) - &
1674 : new_force*(rj(i_xyz) - rref(i_xyz))/x
1675 :
1676 : !virial of ref atom
1677 : CALL real_to_scaled(scoord, rref, cell)
1678 : DO j_xyz = 1, 3
1679 : !$OMP ATOMIC
1680 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1681 : - new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1682 : END DO
1683 : END DO !i_xyz
1684 :
1685 : DEALLOCATE (blk)
1686 : END DO
1687 : CALL dbt_iterator_stop(iter)
1688 : !$OMP END PARALLEL
1689 :
1690 5928 : END SUBROUTINE get_2c_bump_forces
1691 :
1692 : ! **************************************************************************************************
1693 : !> \brief The bumb function as defined by Juerg
1694 : !> \param x ...
1695 : !> \param r0 ...
1696 : !> \param r1 ...
1697 : !> \return ...
1698 : ! **************************************************************************************************
1699 31329 : FUNCTION bump(x, r0, r1) RESULT(b)
1700 : REAL(dp), INTENT(IN) :: x, r0, r1
1701 : REAL(dp) :: b
1702 :
1703 : REAL(dp) :: r
1704 :
1705 : !Head-Gordon
1706 : !b = 1.0_dp/(1.0_dp+EXP((r1-r0)/(r1-x)-(r1-r0)/(x-r0)))
1707 : !Juerg
1708 31329 : r = (x - r0)/(r1 - r0)
1709 31329 : b = -6.0_dp*r**5 + 15.0_dp*r**4 - 10.0_dp*r**3 + 1.0_dp
1710 31329 : IF (x >= r1) b = 0.0_dp
1711 31329 : IF (x <= r0) b = 1.0_dp
1712 :
1713 31329 : END FUNCTION bump
1714 :
1715 : ! **************************************************************************************************
1716 : !> \brief The derivative of the bump function
1717 : !> \param x ...
1718 : !> \param r0 ...
1719 : !> \param r1 ...
1720 : !> \return ...
1721 : ! **************************************************************************************************
1722 610 : FUNCTION dbump(x, r0, r1) RESULT(b)
1723 : REAL(dp), INTENT(IN) :: x, r0, r1
1724 : REAL(dp) :: b
1725 :
1726 : REAL(dp) :: r
1727 :
1728 610 : r = (x - r0)/(r1 - r0)
1729 610 : b = (-30.0_dp*r**4 + 60.0_dp*r**3 - 30.0_dp*r**2)/(r1 - r0)
1730 610 : IF (x >= r1) b = 0.0_dp
1731 610 : IF (x <= r0) b = 0.0_dp
1732 :
1733 610 : END FUNCTION dbump
1734 :
1735 : ! **************************************************************************************************
1736 : !> \brief return the cell index a+c corresponding to given cell index i and b, with i = a+c-b
1737 : !> \param i_index ...
1738 : !> \param b_index ...
1739 : !> \param qs_env ...
1740 : !> \return ...
1741 : ! **************************************************************************************************
1742 652858 : FUNCTION get_apc_index_from_ib(i_index, b_index, qs_env) RESULT(apc_index)
1743 : INTEGER, INTENT(IN) :: i_index, b_index
1744 : TYPE(qs_environment_type), POINTER :: qs_env
1745 : INTEGER :: apc_index
1746 :
1747 : INTEGER, DIMENSION(3) :: cell_apc
1748 652858 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1749 652858 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1750 : TYPE(kpoint_type), POINTER :: kpoints
1751 :
1752 652858 : CALL get_qs_env(qs_env, kpoints=kpoints)
1753 652858 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1754 :
1755 : !i = a+c-b => a+c = i+b
1756 2611432 : cell_apc(:) = index_to_cell(:, i_index) + index_to_cell(:, b_index)
1757 :
1758 4471209 : IF (ANY([cell_apc(1), cell_apc(2), cell_apc(3)] < LBOUND(cell_to_index)) .OR. &
1759 : ANY([cell_apc(1), cell_apc(2), cell_apc(3)] > UBOUND(cell_to_index))) THEN
1760 :
1761 : apc_index = 0
1762 : ELSE
1763 574816 : apc_index = cell_to_index(cell_apc(1), cell_apc(2), cell_apc(3))
1764 : END IF
1765 :
1766 652858 : END FUNCTION get_apc_index_from_ib
1767 :
1768 : ! **************************************************************************************************
1769 : !> \brief return the cell index i corresponding to the summ of cell_a and cell_c
1770 : !> \param a_index ...
1771 : !> \param c_index ...
1772 : !> \param qs_env ...
1773 : !> \return ...
1774 : ! **************************************************************************************************
1775 0 : FUNCTION get_apc_index(a_index, c_index, qs_env) RESULT(i_index)
1776 : INTEGER, INTENT(IN) :: a_index, c_index
1777 : TYPE(qs_environment_type), POINTER :: qs_env
1778 : INTEGER :: i_index
1779 :
1780 : INTEGER, DIMENSION(3) :: cell_i
1781 0 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1782 0 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1783 : TYPE(kpoint_type), POINTER :: kpoints
1784 :
1785 0 : CALL get_qs_env(qs_env, kpoints=kpoints)
1786 0 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1787 :
1788 0 : cell_i(:) = index_to_cell(:, a_index) + index_to_cell(:, c_index)
1789 :
1790 0 : IF (ANY([cell_i(1), cell_i(2), cell_i(3)] < LBOUND(cell_to_index)) .OR. &
1791 : ANY([cell_i(1), cell_i(2), cell_i(3)] > UBOUND(cell_to_index))) THEN
1792 :
1793 : i_index = 0
1794 : ELSE
1795 0 : i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1796 : END IF
1797 :
1798 0 : END FUNCTION get_apc_index
1799 :
1800 : ! **************************************************************************************************
1801 : !> \brief return the cell index i corresponding to the summ of cell_a + cell_c - cell_b
1802 : !> \param apc_index ...
1803 : !> \param b_index ...
1804 : !> \param qs_env ...
1805 : !> \return ...
1806 : ! **************************************************************************************************
1807 907344 : FUNCTION get_i_index(apc_index, b_index, qs_env) RESULT(i_index)
1808 : INTEGER, INTENT(IN) :: apc_index, b_index
1809 : TYPE(qs_environment_type), POINTER :: qs_env
1810 : INTEGER :: i_index
1811 :
1812 : INTEGER, DIMENSION(3) :: cell_i
1813 907344 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1814 907344 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1815 : TYPE(kpoint_type), POINTER :: kpoints
1816 :
1817 907344 : CALL get_qs_env(qs_env, kpoints=kpoints)
1818 907344 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1819 :
1820 3629376 : cell_i(:) = index_to_cell(:, apc_index) - index_to_cell(:, b_index)
1821 :
1822 6219188 : IF (ANY([cell_i(1), cell_i(2), cell_i(3)] < LBOUND(cell_to_index)) .OR. &
1823 : ANY([cell_i(1), cell_i(2), cell_i(3)] > UBOUND(cell_to_index))) THEN
1824 :
1825 : i_index = 0
1826 : ELSE
1827 796068 : i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1828 : END IF
1829 :
1830 907344 : END FUNCTION get_i_index
1831 :
1832 : ! **************************************************************************************************
1833 : !> \brief A routine that returns all allowed a,c pairs such that a+c images corresponds to the value
1834 : !> of the apc_index input. Takes into account that image a corresponds to 3c integrals, which
1835 : !> are ordered in their own way
1836 : !> \param ac_pairs ...
1837 : !> \param apc_index ...
1838 : !> \param ri_data ...
1839 : !> \param qs_env ...
1840 : ! **************************************************************************************************
1841 19680 : SUBROUTINE get_ac_pairs(ac_pairs, apc_index, ri_data, qs_env)
1842 : INTEGER, DIMENSION(:, :), INTENT(INOUT) :: ac_pairs
1843 : INTEGER, INTENT(IN) :: apc_index
1844 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1845 : TYPE(qs_environment_type), POINTER :: qs_env
1846 :
1847 : INTEGER :: a_index, actual_img, c_index, nimg
1848 :
1849 19680 : nimg = SIZE(ac_pairs, 1)
1850 :
1851 1873728 : ac_pairs(:, :) = 0
1852 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(ac_pairs,nimg,ri_data,qs_env,apc_index) &
1853 19680 : !$OMP PRIVATE(a_index,actual_img,c_index)
1854 : DO a_index = 1, nimg
1855 : actual_img = ri_data%idx_to_img(a_index)
1856 : !c = a+c - a
1857 : c_index = get_i_index(apc_index, actual_img, qs_env)
1858 : ac_pairs(a_index, 1) = a_index
1859 : ac_pairs(a_index, 2) = c_index
1860 : END DO
1861 : !$OMP END PARALLEL DO
1862 :
1863 19680 : END SUBROUTINE get_ac_pairs
1864 :
1865 : ! **************************************************************************************************
1866 : !> \brief A routine that returns all allowed i,a+c pairs such that, for the given value of b, we have
1867 : !> i = a+c-b. Takes into account that image i corrsponds to the 3c ints, which are ordered in
1868 : !> their own way
1869 : !> \param iapc_pairs ...
1870 : !> \param b_index ...
1871 : !> \param ri_data ...
1872 : !> \param qs_env ...
1873 : !> \param actual_i_img ...
1874 : ! **************************************************************************************************
1875 15887 : SUBROUTINE get_iapc_pairs(iapc_pairs, b_index, ri_data, qs_env, actual_i_img)
1876 : INTEGER, DIMENSION(:, :), INTENT(INOUT) :: iapc_pairs
1877 : INTEGER, INTENT(IN) :: b_index
1878 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1879 : TYPE(qs_environment_type), POINTER :: qs_env
1880 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: actual_i_img
1881 :
1882 : INTEGER :: actual_img, apc_index, i_index, nimg
1883 :
1884 15887 : nimg = SIZE(iapc_pairs, 1)
1885 74475 : IF (PRESENT(actual_i_img)) actual_i_img(:) = 0
1886 :
1887 1353377 : iapc_pairs(:, :) = 0
1888 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(iapc_pairs,nimg,ri_data,qs_env,b_index,actual_i_img) &
1889 15887 : !$OMP PRIVATE(i_index,actual_img,apc_index)
1890 : DO i_index = 1, nimg
1891 : actual_img = ri_data%idx_to_img(i_index)
1892 : apc_index = get_apc_index_from_ib(actual_img, b_index, qs_env)
1893 : IF (apc_index == 0) CYCLE
1894 : iapc_pairs(i_index, 1) = i_index
1895 : iapc_pairs(i_index, 2) = apc_index
1896 : IF (PRESENT(actual_i_img)) actual_i_img(i_index) = actual_img
1897 : END DO
1898 :
1899 15887 : END SUBROUTINE get_iapc_pairs
1900 :
1901 : ! **************************************************************************************************
1902 : !> \brief A function that, given a cell index a, returun the index corresponding to -a, and zero if
1903 : !> if out of bounds
1904 : !> \param a_index ...
1905 : !> \param qs_env ...
1906 : !> \return ...
1907 : ! **************************************************************************************************
1908 95568 : FUNCTION get_opp_index(a_index, qs_env) RESULT(opp_index)
1909 : INTEGER, INTENT(IN) :: a_index
1910 : TYPE(qs_environment_type), POINTER :: qs_env
1911 : INTEGER :: opp_index
1912 :
1913 : INTEGER, DIMENSION(3) :: opp_cell
1914 95568 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1915 95568 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1916 : TYPE(kpoint_type), POINTER :: kpoints
1917 :
1918 95568 : NULLIFY (kpoints, cell_to_index, index_to_cell)
1919 :
1920 95568 : CALL get_qs_env(qs_env, kpoints=kpoints)
1921 95568 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1922 :
1923 382272 : opp_cell(:) = -index_to_cell(:, a_index)
1924 :
1925 668976 : IF (ANY([opp_cell(1), opp_cell(2), opp_cell(3)] < LBOUND(cell_to_index)) .OR. &
1926 : ANY([opp_cell(1), opp_cell(2), opp_cell(3)] > UBOUND(cell_to_index))) THEN
1927 :
1928 : opp_index = 0
1929 : ELSE
1930 95568 : opp_index = cell_to_index(opp_cell(1), opp_cell(2), opp_cell(3))
1931 : END IF
1932 :
1933 95568 : END FUNCTION get_opp_index
1934 :
1935 : ! **************************************************************************************************
1936 : !> \brief A routine that returns the actual non-symemtric density matrix for each image, by Fourier
1937 : !> transforming the kpoint density matrix
1938 : !> \param rho_ao_t ...
1939 : !> \param rho_ao ...
1940 : !> \param scale_prev_p ...
1941 : !> \param ri_data ...
1942 : !> \param qs_env ...
1943 : ! **************************************************************************************************
1944 598 : SUBROUTINE get_pmat_images(rho_ao_t, rho_ao, scale_prev_p, ri_data, qs_env)
1945 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t
1946 : TYPE(dbcsr_p_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao
1947 : REAL(dp), INTENT(IN) :: scale_prev_p
1948 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1949 : TYPE(qs_environment_type), POINTER :: qs_env
1950 :
1951 : INTEGER :: cell_j(3), i_img, i_spin, iatom, icol, &
1952 : irow, j_img, jatom, mi_img, mj_img, &
1953 : nimg, nspins
1954 598 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1955 : LOGICAL :: found
1956 : REAL(dp) :: fac
1957 598 : REAL(dp), DIMENSION(:, :), POINTER :: pblock, pblock_desymm
1958 598 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: matrix_ks, rho_desymm
1959 5382 : TYPE(dbt_type) :: tmp
1960 : TYPE(dft_control_type), POINTER :: dft_control
1961 : TYPE(kpoint_type), POINTER :: kpoints
1962 : TYPE(neighbor_list_iterator_p_type), &
1963 598 : DIMENSION(:), POINTER :: nl_iterator
1964 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1965 598 : POINTER :: sab_nl, sab_nl_nosym
1966 : TYPE(qs_scf_env_type), POINTER :: scf_env
1967 :
1968 598 : NULLIFY (rho_desymm, kpoints, sab_nl_nosym, scf_env, matrix_ks, dft_control, &
1969 598 : sab_nl, nl_iterator, cell_to_index, pblock, pblock_desymm)
1970 :
1971 598 : CALL get_qs_env(qs_env, kpoints=kpoints, scf_env=scf_env, matrix_ks_kp=matrix_ks, dft_control=dft_control)
1972 598 : CALL get_kpoint_info(kpoints, sab_nl_nosym=sab_nl_nosym, cell_to_index=cell_to_index, sab_nl=sab_nl)
1973 :
1974 598 : IF (dft_control%do_admm) THEN
1975 342 : CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_kp=matrix_ks)
1976 : END IF
1977 :
1978 598 : nspins = SIZE(matrix_ks, 1)
1979 598 : nimg = ri_data%nimg
1980 :
1981 40100 : ALLOCATE (rho_desymm(nspins, nimg))
1982 17796 : DO i_img = 1, nimg
1983 38306 : DO i_spin = 1, nspins
1984 20510 : ALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1985 : CALL dbcsr_create(rho_desymm(i_spin, i_img)%matrix, template=matrix_ks(i_spin, i_img)%matrix, &
1986 20510 : matrix_type=dbcsr_type_no_symmetry)
1987 37708 : CALL cp_dbcsr_alloc_block_from_nbl(rho_desymm(i_spin, i_img)%matrix, sab_nl_nosym)
1988 : END DO
1989 : END DO
1990 598 : CALL dbt_create(rho_desymm(1, 1)%matrix, tmp)
1991 :
1992 : !We transfor the symmtric typed (but not actually symmetric: P_ab^i = P_ba^-i) real-spaced density
1993 : !matrix into proper non-symemtric ones (using the same nl for consistency)
1994 598 : CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1995 28857 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1996 28259 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
1997 28259 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
1998 28259 : IF (j_img > nimg .OR. j_img < 1) CYCLE
1999 :
2000 20879 : fac = 1.0_dp
2001 20879 : IF (iatom == jatom) fac = 0.5_dp
2002 20879 : mj_img = get_opp_index(j_img, qs_env)
2003 : !if no opposite image, then no sum of P^j + P^-j => need full diag
2004 20879 : IF (mj_img == 0) fac = 1.0_dp
2005 :
2006 20879 : irow = iatom
2007 20879 : icol = jatom
2008 20879 : IF (iatom > jatom) THEN
2009 : !because symmetric nl. Value for atom pair i,j is actually stored in j,i if i > j
2010 6933 : irow = jatom
2011 6933 : icol = iatom
2012 : END IF
2013 :
2014 46650 : DO i_spin = 1, nspins
2015 25173 : CALL dbcsr_get_block_p(rho_ao(i_spin, j_img)%matrix, irow, icol, pblock, found)
2016 25173 : IF (.NOT. found) CYCLE
2017 :
2018 : !distribution of symm and non-symm matrix match in that way
2019 25173 : CALL dbcsr_get_block_p(rho_desymm(i_spin, j_img)%matrix, iatom, jatom, pblock_desymm, found)
2020 25173 : IF (.NOT. found) CYCLE
2021 :
2022 103778 : IF (iatom > jatom) THEN
2023 768046 : pblock_desymm(:, :) = fac*TRANSPOSE(pblock(:, :))
2024 : ELSE
2025 1853980 : pblock_desymm(:, :) = fac*pblock(:, :)
2026 : END IF
2027 : END DO
2028 : END DO
2029 598 : CALL neighbor_list_iterator_release(nl_iterator)
2030 :
2031 17796 : DO i_img = 1, nimg
2032 38306 : DO i_spin = 1, nspins
2033 20510 : CALL dbt_scale(rho_ao_t(i_spin, i_img), scale_prev_p)
2034 :
2035 20510 : CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, i_img)%matrix, tmp)
2036 20510 : CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), summation=.TRUE., move_data=.TRUE.)
2037 :
2038 : !symmetrize by addin transpose of opp img
2039 20510 : mi_img = get_opp_index(i_img, qs_env)
2040 20510 : IF (mi_img > 0 .AND. mi_img <= nimg) THEN
2041 18326 : CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, mi_img)%matrix, tmp)
2042 18326 : CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), order=[2, 1], summation=.TRUE., move_data=.TRUE.)
2043 : END IF
2044 37708 : CALL dbt_filter(rho_ao_t(i_spin, i_img), ri_data%filter_eps)
2045 : END DO
2046 : END DO
2047 :
2048 17796 : DO i_img = 1, nimg
2049 38306 : DO i_spin = 1, nspins
2050 20510 : CALL dbcsr_release(rho_desymm(i_spin, i_img)%matrix)
2051 37708 : DEALLOCATE (rho_desymm(i_spin, i_img)%matrix)
2052 : END DO
2053 : END DO
2054 :
2055 598 : CALL dbt_destroy(tmp)
2056 598 : DEALLOCATE (rho_desymm)
2057 :
2058 1196 : END SUBROUTINE get_pmat_images
2059 :
2060 : ! **************************************************************************************************
2061 : !> \brief A routine that, given a cell index b and atom indices ij, returns a 2c tensor with the HFX
2062 : !> potential (P_i^0|Q_j^b), within the extended RI basis
2063 : !> \param t_2c_pot ...
2064 : !> \param mat_orig ...
2065 : !> \param atom_i ...
2066 : !> \param atom_j ...
2067 : !> \param img_b ...
2068 : !> \param ri_data ...
2069 : !> \param qs_env ...
2070 : !> \param do_inverse ...
2071 : !> \param para_env_ext ...
2072 : !> \param blacs_env_ext ...
2073 : !> \param dbcsr_template ...
2074 : !> \param off_diagonal ...
2075 : !> \param skip_inverse ...
2076 : ! **************************************************************************************************
2077 10369 : SUBROUTINE get_ext_2c_int(t_2c_pot, mat_orig, atom_i, atom_j, img_b, ri_data, qs_env, do_inverse, &
2078 : para_env_ext, blacs_env_ext, dbcsr_template, off_diagonal, skip_inverse)
2079 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_pot
2080 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_orig
2081 : INTEGER, INTENT(IN) :: atom_i, atom_j, img_b
2082 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2083 : TYPE(qs_environment_type), POINTER :: qs_env
2084 : LOGICAL, INTENT(IN), OPTIONAL :: do_inverse
2085 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env_ext
2086 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: blacs_env_ext
2087 : TYPE(dbcsr_type), OPTIONAL, POINTER :: dbcsr_template
2088 : LOGICAL, INTENT(IN), OPTIONAL :: off_diagonal, skip_inverse
2089 :
2090 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_ext_2c_int'
2091 :
2092 : INTEGER :: group, handle, handle2, i_img, i_RI, iatom, iblk, ikind, img_tot, j_img, j_RI, &
2093 : jatom, jblk, jkind, n_dependent, natom, nblks_RI, nimg, nkind
2094 10369 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2
2095 10369 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: present_atoms_i, present_atoms_j
2096 : INTEGER, DIMENSION(3) :: cell_b, cell_i, cell_j, cell_tot
2097 10369 : INTEGER, DIMENSION(:), POINTER :: col_dist, col_dist_ext, ri_blk_size_ext, &
2098 10369 : row_dist, row_dist_ext
2099 10369 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell, pgrid
2100 10369 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2101 : LOGICAL :: do_inverse_prv, found, my_offd, &
2102 : skip_inverse_prv, use_template
2103 : REAL(dp) :: bfac, dij, r0, r1, threshold
2104 : REAL(dp), DIMENSION(3) :: ri, rij, rj, rref, scoord
2105 10369 : REAL(dp), DIMENSION(:, :), POINTER :: pblock
2106 : TYPE(cell_type), POINTER :: cell
2107 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
2108 : TYPE(dbcsr_distribution_type) :: dbcsr_dist, dbcsr_dist_ext
2109 : TYPE(dbcsr_iterator_type) :: dbcsr_iter
2110 : TYPE(dbcsr_type) :: work, work_tight, work_tight_inv
2111 72583 : TYPE(dbt_type) :: t_2c_tmp
2112 : TYPE(distribution_2d_type), POINTER :: dist_2d
2113 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2114 10369 : DIMENSION(:), TARGET :: basis_set_RI
2115 : TYPE(kpoint_type), POINTER :: kpoints
2116 : TYPE(mp_para_env_type), POINTER :: para_env
2117 : TYPE(neighbor_list_iterator_p_type), &
2118 10369 : DIMENSION(:), POINTER :: nl_iterator
2119 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2120 10369 : POINTER :: nl_2c
2121 10369 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2122 10369 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2123 :
2124 10369 : NULLIFY (qs_kind_set, nl_2c, nl_iterator, cell, kpoints, cell_to_index, index_to_cell, dist_2d, &
2125 10369 : para_env, pblock, blacs_env, particle_set, col_dist, row_dist, pgrid, &
2126 10369 : col_dist_ext, row_dist_ext)
2127 :
2128 10369 : CALL timeset(routineN, handle)
2129 :
2130 : !Idea: run over the neighbor list once for i and once for j, and record in which cell the MIC
2131 : ! atoms are. Then loop over the atoms and only take the pairs the we need
2132 :
2133 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
2134 10369 : kpoints=kpoints, para_env=para_env, blacs_env=blacs_env, particle_set=particle_set)
2135 10369 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
2136 :
2137 10369 : do_inverse_prv = .FALSE.
2138 10369 : IF (PRESENT(do_inverse)) do_inverse_prv = do_inverse
2139 352 : IF (do_inverse_prv) THEN
2140 352 : CPASSERT(atom_i == atom_j)
2141 : END IF
2142 :
2143 10369 : skip_inverse_prv = .FALSE.
2144 10369 : IF (PRESENT(skip_inverse)) skip_inverse_prv = skip_inverse
2145 :
2146 10369 : my_offd = .FALSE.
2147 10369 : IF (PRESENT(off_diagonal)) my_offd = off_diagonal
2148 :
2149 10369 : IF (PRESENT(para_env_ext)) para_env => para_env_ext
2150 10369 : IF (PRESENT(blacs_env_ext)) blacs_env => blacs_env_ext
2151 :
2152 10369 : nimg = SIZE(mat_orig)
2153 :
2154 10369 : CALL timeset(routineN//"_nl_iter", handle2)
2155 :
2156 : !create our own dist_2d in the subgroup
2157 41476 : ALLOCATE (dist1(natom), dist2(natom))
2158 31107 : DO iatom = 1, natom
2159 20738 : dist1(iatom) = MOD(iatom, blacs_env%num_pe(1))
2160 31107 : dist2(iatom) = MOD(iatom, blacs_env%num_pe(2))
2161 : END DO
2162 10369 : CALL distribution_2d_create(dist_2d, dist1, dist2, nkind, particle_set, blacs_env_ext=blacs_env)
2163 :
2164 46173 : ALLOCATE (basis_set_RI(nkind))
2165 10369 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
2166 :
2167 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
2168 10369 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
2169 :
2170 62214 : ALLOCATE (present_atoms_i(natom, nimg), present_atoms_j(natom, nimg))
2171 1181257 : present_atoms_i = 0
2172 1181257 : present_atoms_j = 0
2173 :
2174 10369 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
2175 473197 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
2176 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, r=rij, cell=cell_j, &
2177 462828 : ikind=ikind, jkind=jkind)
2178 :
2179 1851312 : dij = NORM2(rij)
2180 :
2181 462828 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2182 462828 : IF (j_img > nimg .OR. j_img < 1) CYCLE
2183 :
2184 460418 : IF (iatom == atom_i .AND. dij <= ri_data%kp_RI_range) present_atoms_i(jatom, j_img) = 1
2185 470787 : IF (iatom == atom_j .AND. dij <= ri_data%kp_RI_range) present_atoms_j(jatom, j_img) = 1
2186 : END DO
2187 10369 : CALL neighbor_list_iterator_release(nl_iterator)
2188 10369 : CALL release_neighbor_list_sets(nl_2c)
2189 10369 : CALL distribution_2d_release(dist_2d)
2190 10369 : CALL timestop(handle2)
2191 :
2192 10369 : CALL para_env%sum(present_atoms_i)
2193 10369 : CALL para_env%sum(present_atoms_j)
2194 :
2195 : !Need to build a work matrix with matching distribution to mat_orig
2196 : !If template is provided, use it. If not, we create it.
2197 10369 : use_template = .FALSE.
2198 10369 : IF (PRESENT(dbcsr_template)) THEN
2199 9541 : IF (ASSOCIATED(dbcsr_template)) use_template = .TRUE.
2200 : END IF
2201 :
2202 : IF (use_template) THEN
2203 9217 : CALL dbcsr_create(work, template=dbcsr_template)
2204 : ELSE
2205 1152 : CALL dbcsr_get_info(mat_orig(1), distribution=dbcsr_dist)
2206 1152 : CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, pgrid=pgrid)
2207 4608 : ALLOCATE (row_dist_ext(ri_data%ncell_RI*natom), col_dist_ext(ri_data%ncell_RI*natom))
2208 2304 : ALLOCATE (ri_blk_size_ext(ri_data%ncell_RI*natom))
2209 8220 : DO i_RI = 1, ri_data%ncell_RI
2210 35340 : row_dist_ext((i_RI - 1)*natom + 1:i_RI*natom) = row_dist(:)
2211 35340 : col_dist_ext((i_RI - 1)*natom + 1:i_RI*natom) = col_dist(:)
2212 22356 : RI_blk_size_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
2213 : END DO
2214 :
2215 : CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2216 1152 : row_dist=row_dist_ext, col_dist=col_dist_ext)
2217 : CALL dbcsr_create(work, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2218 1152 : row_blk_size=RI_blk_size_ext, col_blk_size=RI_blk_size_ext)
2219 1152 : CALL dbcsr_distribution_release(dbcsr_dist_ext)
2220 1152 : DEALLOCATE (col_dist_ext, row_dist_ext, RI_blk_size_ext)
2221 :
2222 3456 : IF (PRESENT(dbcsr_template)) THEN
2223 324 : ALLOCATE (dbcsr_template)
2224 324 : CALL dbcsr_create(dbcsr_template, template=work)
2225 : END IF
2226 : END IF !use_template
2227 :
2228 41476 : cell_b(:) = index_to_cell(:, img_b)
2229 400665 : DO i_img = 1, nimg
2230 390296 : i_RI = ri_data%img_to_RI_cell(i_img)
2231 390296 : IF (i_RI == 0) CYCLE
2232 262120 : cell_i(:) = index_to_cell(:, i_img)
2233 3038369 : DO j_img = 1, nimg
2234 2962470 : j_RI = ri_data%img_to_RI_cell(j_img)
2235 2962470 : IF (j_RI == 0) CYCLE
2236 2207920 : cell_j(:) = index_to_cell(:, j_img)
2237 2207920 : cell_tot = cell_j - cell_i + cell_b
2238 :
2239 3818385 : IF (ANY([cell_tot(1), cell_tot(2), cell_tot(3)] < LBOUND(cell_to_index)) .OR. &
2240 : ANY([cell_tot(1), cell_tot(2), cell_tot(3)] > UBOUND(cell_to_index))) CYCLE
2241 512599 : img_tot = cell_to_index(cell_tot(1), cell_tot(2), cell_tot(3))
2242 512599 : IF (img_tot > nimg .OR. img_tot < 1) CYCLE
2243 :
2244 352501 : CALL dbcsr_iterator_start(dbcsr_iter, mat_orig(img_tot))
2245 1004091 : DO WHILE (dbcsr_iterator_blocks_left(dbcsr_iter))
2246 651590 : CALL dbcsr_iterator_next_block(dbcsr_iter, row=iatom, column=jatom)
2247 651590 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2248 252289 : IF (present_atoms_j(jatom, j_img) == 0) CYCLE
2249 111628 : IF (my_offd .AND. (i_RI - 1)*natom + iatom == (j_RI - 1)*natom + jatom) CYCLE
2250 :
2251 111257 : CALL dbcsr_get_block_p(mat_orig(img_tot), iatom, jatom, pblock, found)
2252 111257 : IF (.NOT. found) CYCLE
2253 :
2254 1004091 : CALL dbcsr_put_block(work, (i_RI - 1)*natom + iatom, (j_RI - 1)*natom + jatom, pblock)
2255 :
2256 : END DO
2257 3665886 : CALL dbcsr_iterator_stop(dbcsr_iter)
2258 :
2259 : END DO !j_img
2260 : END DO !i_img
2261 10369 : CALL dbcsr_finalize(work)
2262 :
2263 10369 : IF (do_inverse_prv) THEN
2264 :
2265 352 : r1 = ri_data%kp_RI_range
2266 352 : r0 = ri_data%kp_bump_rad
2267 :
2268 : !Because there are a lot of empty rows/cols in work, we need to get rid of them for inversion
2269 28600 : nblks_RI = SUM(present_atoms_i)
2270 1760 : ALLOCATE (col_dist_ext(nblks_RI), row_dist_ext(nblks_RI), RI_blk_size_ext(nblks_RI))
2271 352 : iblk = 0
2272 9768 : DO i_img = 1, nimg
2273 9416 : i_RI = ri_data%img_to_RI_cell(i_img)
2274 9416 : IF (i_RI == 0) CYCLE
2275 7000 : DO iatom = 1, natom
2276 4432 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2277 1484 : iblk = iblk + 1
2278 1484 : col_dist_ext(iblk) = col_dist(iatom)
2279 1484 : row_dist_ext(iblk) = row_dist(iatom)
2280 13848 : RI_blk_size_ext(iblk) = ri_data%bsizes_RI(iatom)
2281 : END DO
2282 : END DO
2283 :
2284 : CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2285 352 : row_dist=row_dist_ext, col_dist=col_dist_ext)
2286 : CALL dbcsr_create(work_tight, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2287 352 : row_blk_size=RI_blk_size_ext, col_blk_size=RI_blk_size_ext)
2288 : CALL dbcsr_create(work_tight_inv, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2289 352 : row_blk_size=RI_blk_size_ext, col_blk_size=RI_blk_size_ext)
2290 352 : CALL dbcsr_distribution_release(dbcsr_dist_ext)
2291 352 : DEALLOCATE (col_dist_ext, row_dist_ext, RI_blk_size_ext)
2292 :
2293 : !We apply a bump function to the RI metric inverse for smooth RI basis extension:
2294 : ! S^-1 = B * ((P|Q)_D + B*(P|Q)_OD*B)^-1 * B, with D block-diagonal blocks and OD off-diagonal
2295 352 : rref = pbc(particle_set(atom_i)%r, cell)
2296 :
2297 352 : iblk = 0
2298 9768 : DO i_img = 1, nimg
2299 9416 : i_RI = ri_data%img_to_RI_cell(i_img)
2300 9416 : IF (i_RI == 0) CYCLE
2301 7000 : DO iatom = 1, natom
2302 4432 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2303 1484 : iblk = iblk + 1
2304 :
2305 1484 : CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
2306 5936 : CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
2307 :
2308 1484 : jblk = 0
2309 58868 : DO j_img = 1, nimg
2310 47968 : j_RI = ri_data%img_to_RI_cell(j_img)
2311 47968 : IF (j_RI == 0) CYCLE
2312 37816 : DO jatom = 1, natom
2313 22256 : IF (present_atoms_j(jatom, j_img) == 0) CYCLE
2314 7236 : jblk = jblk + 1
2315 :
2316 7236 : CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
2317 28944 : CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
2318 :
2319 7236 : CALL dbcsr_get_block_p(work, (i_RI - 1)*natom + iatom, (j_RI - 1)*natom + jatom, pblock, found)
2320 7236 : IF (.NOT. found) CYCLE
2321 :
2322 3216 : bfac = 1.0_dp
2323 18060 : IF (iblk /= jblk) bfac = bump(NORM2(ri - rref), r0, r1)*bump(NORM2(rj - rref), r0, r1)
2324 6082128 : CALL dbcsr_put_block(work_tight, iblk, jblk, bfac*pblock(:, :))
2325 : END DO
2326 : END DO
2327 : END DO
2328 : END DO
2329 352 : CALL dbcsr_finalize(work_tight)
2330 352 : CALL dbcsr_clear(work)
2331 :
2332 352 : IF (.NOT. skip_inverse_prv) THEN
2333 176 : SELECT CASE (ri_data%t2c_method)
2334 : CASE (hfx_ri_do_2c_iter)
2335 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
2336 0 : CALL invert_hotelling(work_tight_inv, work_tight, threshold=threshold, silent=.FALSE.)
2337 : CASE (hfx_ri_do_2c_cholesky)
2338 176 : CALL dbcsr_copy(work_tight_inv, work_tight)
2339 176 : CALL cp_dbcsr_cholesky_decompose(work_tight_inv, para_env=para_env, blacs_env=blacs_env)
2340 : CALL cp_dbcsr_cholesky_invert(work_tight_inv, para_env=para_env, blacs_env=blacs_env, &
2341 176 : uplo_to_full=.TRUE.)
2342 : CASE (hfx_ri_do_2c_diag)
2343 0 : CALL dbcsr_copy(work_tight_inv, work_tight)
2344 : CALL cp_dbcsr_power(work_tight_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
2345 176 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
2346 : END SELECT
2347 : ELSE
2348 176 : CALL dbcsr_copy(work_tight_inv, work_tight)
2349 : END IF
2350 :
2351 : !move back data to standard extended RI pattern
2352 : !Note: we apply the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 later, because this matrix
2353 : ! is required for forces
2354 352 : iblk = 0
2355 9768 : DO i_img = 1, nimg
2356 9416 : i_RI = ri_data%img_to_RI_cell(i_img)
2357 9416 : IF (i_RI == 0) CYCLE
2358 7000 : DO iatom = 1, natom
2359 4432 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2360 1484 : iblk = iblk + 1
2361 :
2362 1484 : jblk = 0
2363 58868 : DO j_img = 1, nimg
2364 47968 : j_RI = ri_data%img_to_RI_cell(j_img)
2365 47968 : IF (j_RI == 0) CYCLE
2366 37816 : DO jatom = 1, natom
2367 22256 : IF (present_atoms_j(jatom, j_img) == 0) CYCLE
2368 7236 : jblk = jblk + 1
2369 :
2370 7236 : CALL dbcsr_get_block_p(work_tight_inv, iblk, jblk, pblock, found)
2371 7236 : IF (.NOT. found) CYCLE
2372 :
2373 73641 : CALL dbcsr_put_block(work, (i_RI - 1)*natom + iatom, (j_RI - 1)*natom + jatom, pblock)
2374 : END DO
2375 : END DO
2376 : END DO
2377 : END DO
2378 352 : CALL dbcsr_finalize(work)
2379 :
2380 352 : CALL dbcsr_release(work_tight)
2381 704 : CALL dbcsr_release(work_tight_inv)
2382 : END IF
2383 :
2384 10369 : CALL dbt_create(work, t_2c_tmp)
2385 10369 : CALL dbt_copy_matrix_to_tensor(work, t_2c_tmp)
2386 10369 : CALL dbt_copy(t_2c_tmp, t_2c_pot, move_data=.TRUE.)
2387 10369 : CALL dbt_filter(t_2c_pot, ri_data%filter_eps)
2388 :
2389 10369 : CALL dbt_destroy(t_2c_tmp)
2390 10369 : CALL dbcsr_release(work)
2391 :
2392 10369 : CALL timestop(handle)
2393 :
2394 41476 : END SUBROUTINE get_ext_2c_int
2395 :
2396 : ! **************************************************************************************************
2397 : !> \brief Pre-contract the density matrices with the 3-center integrals:
2398 : !> P_sigma^a,lambda^a+c (mu^0 sigma^a| P^0)
2399 : !> \param t_3c_apc ...
2400 : !> \param rho_ao_t ...
2401 : !> \param ri_data ...
2402 : !> \param qs_env ...
2403 : ! **************************************************************************************************
2404 324 : SUBROUTINE contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
2405 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, rho_ao_t
2406 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2407 : TYPE(qs_environment_type), POINTER :: qs_env
2408 :
2409 : CHARACTER(len=*), PARAMETER :: routineN = 'contract_pmat_3c'
2410 :
2411 : INTEGER :: apc_img, b_img, batch_size, handle, &
2412 : i_batch, i_img, i_spin, idx, j_batch, &
2413 : n_batch_img, n_batch_nze, nimg, &
2414 : nimg_nze, nspins
2415 : INTEGER(int_8) :: nflop, nze
2416 324 : INTEGER, ALLOCATABLE, DIMENSION(:) :: apc_filter, batch_ranges_img, &
2417 324 : batch_ranges_nze, int_indices
2418 324 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ac_pairs, iapc_pairs
2419 : REAL(dp) :: occ, t1, t2
2420 2916 : TYPE(dbt_type) :: t_3c_tmp
2421 324 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ints_stack, res_stack, rho_stack
2422 : TYPE(dft_control_type), POINTER :: dft_control
2423 :
2424 324 : CALL timeset(routineN, handle)
2425 :
2426 324 : CALL get_qs_env(qs_env, dft_control=dft_control)
2427 :
2428 324 : nimg = ri_data%nimg
2429 324 : nimg_nze = ri_data%nimg_nze
2430 324 : nspins = dft_control%nspins
2431 :
2432 324 : CALL dbt_create(t_3c_apc(1, 1), t_3c_tmp)
2433 :
2434 324 : batch_size = ri_data%kp_stack_size
2435 :
2436 1620 : ALLOCATE (apc_filter(nimg), iapc_pairs(nimg, 2))
2437 9634 : apc_filter = 0
2438 9634 : DO b_img = 1, nimg
2439 9310 : CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
2440 265064 : DO i_img = 1, nimg_nze
2441 255430 : idx = iapc_pairs(i_img, 2)
2442 255430 : IF (idx < 1 .OR. idx > nimg) CYCLE
2443 264740 : apc_filter(idx) = 1
2444 : END DO
2445 : END DO
2446 :
2447 : !batching over all images
2448 324 : n_batch_img = nimg/batch_size
2449 324 : IF (MODULO(nimg, batch_size) /= 0) n_batch_img = n_batch_img + 1
2450 972 : ALLOCATE (batch_ranges_img(n_batch_img + 1))
2451 1000 : DO i_batch = 1, n_batch_img
2452 1000 : batch_ranges_img(i_batch) = (i_batch - 1)*batch_size + 1
2453 : END DO
2454 324 : batch_ranges_img(n_batch_img + 1) = nimg + 1
2455 :
2456 : !batching over images with non-zero 3c integrals
2457 324 : n_batch_nze = nimg_nze/batch_size
2458 324 : IF (MODULO(nimg_nze, batch_size) /= 0) n_batch_nze = n_batch_nze + 1
2459 972 : ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
2460 796 : DO i_batch = 1, n_batch_nze
2461 796 : batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
2462 : END DO
2463 324 : batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
2464 :
2465 : !Create the stack tensors in the approriate distribution
2466 10044 : ALLOCATE (rho_stack(2), ints_stack(2), res_stack(2))
2467 : CALL get_stack_tensors(res_stack, rho_stack, ints_stack, rho_ao_t(1, 1), &
2468 324 : ri_data%t_3c_int_ctr_1(1, 1), batch_size, ri_data, qs_env)
2469 :
2470 1296 : ALLOCATE (ac_pairs(nimg, 2), int_indices(nimg_nze))
2471 6874 : DO i_img = 1, nimg_nze
2472 6874 : int_indices(i_img) = i_img
2473 : END DO
2474 :
2475 324 : t1 = m_walltime()
2476 796 : DO j_batch = 1, n_batch_nze
2477 : !First batch is over the integrals. They are always in the same order, consistent with get_ac_pairs
2478 : CALL fill_3c_stack(ints_stack(1), ri_data%t_3c_int_ctr_1(1, :), int_indices, 3, ri_data, &
2479 1416 : img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)])
2480 472 : CALL dbt_copy(ints_stack(1), ints_stack(2), move_data=.TRUE.)
2481 :
2482 1402 : DO i_spin = 1, nspins
2483 2568 : DO i_batch = 1, n_batch_img
2484 : !Second batch is over the P matrix. Here we fill the stacked rho tensors col by col
2485 21170 : DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2486 19680 : IF (apc_filter(apc_img) == 0) CYCLE
2487 19680 : CALL get_ac_pairs(ac_pairs, apc_img, ri_data, qs_env)
2488 : CALL fill_2c_stack(rho_stack(1), rho_ao_t(i_spin, :), ac_pairs(:, 2), 1, ri_data, &
2489 : img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)], &
2490 60530 : shift=apc_img - batch_ranges_img(i_batch) + 1)
2491 :
2492 : END DO !apc_img
2493 1490 : CALL get_tensor_occupancy(rho_stack(1), nze, occ)
2494 1490 : IF (nze == 0) CYCLE
2495 1466 : CALL dbt_copy(rho_stack(1), rho_stack(2), move_data=.TRUE.)
2496 :
2497 : !The actual contraction
2498 1466 : CALL dbt_batched_contract_init(rho_stack(2))
2499 : CALL dbt_contract(1.0_dp, ints_stack(2), rho_stack(2), &
2500 : 0.0_dp, res_stack(2), map_1=[1, 2], map_2=[3], &
2501 : contract_1=[3], notcontract_1=[1, 2], &
2502 : contract_2=[1], notcontract_2=[2], &
2503 1466 : filter_eps=ri_data%filter_eps, flop=nflop)
2504 1466 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2505 1466 : CALL dbt_batched_contract_finalize(rho_stack(2))
2506 1466 : CALL dbt_copy(res_stack(2), res_stack(1), move_data=.TRUE.)
2507 :
2508 23006 : DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2509 : !Destack the resulting tensor and put it in t_3c_apc with correct apc_img
2510 19444 : IF (apc_filter(apc_img) == 0) CYCLE
2511 19444 : CALL unstack_t_3c_apc(t_3c_tmp, res_stack(1), apc_img - batch_ranges_img(i_batch) + 1)
2512 20934 : CALL dbt_copy(t_3c_tmp, t_3c_apc(i_spin, apc_img), summation=.TRUE., move_data=.TRUE.)
2513 : END DO
2514 :
2515 : END DO !i_batch
2516 : END DO !i_spin
2517 : END DO !j_batch
2518 324 : DEALLOCATE (batch_ranges_img)
2519 324 : DEALLOCATE (batch_ranges_nze)
2520 324 : t2 = m_walltime()
2521 324 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2522 :
2523 324 : CALL dbt_destroy(rho_stack(1))
2524 324 : CALL dbt_destroy(rho_stack(2))
2525 324 : CALL dbt_destroy(ints_stack(1))
2526 324 : CALL dbt_destroy(ints_stack(2))
2527 324 : CALL dbt_destroy(res_stack(1))
2528 324 : CALL dbt_destroy(res_stack(2))
2529 324 : CALL dbt_destroy(t_3c_tmp)
2530 :
2531 324 : CALL timestop(handle)
2532 :
2533 3240 : END SUBROUTINE contract_pmat_3c
2534 :
2535 : ! **************************************************************************************************
2536 : !> \brief Pre-contract 3-center integrals with the bumped invrse RI metric, for each atom
2537 : !> \param t_3c_int ...
2538 : !> \param ri_data ...
2539 : !> \param qs_env ...
2540 : ! **************************************************************************************************
2541 88 : SUBROUTINE precontract_3c_ints(t_3c_int, ri_data, qs_env)
2542 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_int
2543 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2544 : TYPE(qs_environment_type), POINTER :: qs_env
2545 :
2546 : CHARACTER(len=*), PARAMETER :: routineN = 'precontract_3c_ints'
2547 :
2548 : INTEGER :: batch_size, handle, i_batch, i_img, &
2549 : i_RI, iatom, is, n_batch, natom, &
2550 : nblks, nblks_3c(3), nimg
2551 : INTEGER(int_8) :: nflop
2552 88 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges, bsizes_RI_ext, bsizes_RI_ext_split, &
2553 88 : bsizes_stack, dist1, dist2, dist3, dist_stack3, idx_to_at_AO, int_indices
2554 792 : TYPE(dbt_distribution_type) :: t_dist
2555 18392 : TYPE(dbt_type) :: t_2c_RI_tmp(2), t_3c_tmp(3)
2556 :
2557 88 : CALL timeset(routineN, handle)
2558 :
2559 88 : CALL get_qs_env(qs_env, natom=natom)
2560 :
2561 88 : nimg = ri_data%nimg
2562 264 : ALLOCATE (int_indices(nimg))
2563 2442 : DO i_img = 1, nimg
2564 2442 : int_indices(i_img) = i_img
2565 : END DO
2566 :
2567 264 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
2568 88 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2569 :
2570 88 : nblks = SIZE(ri_data%bsizes_RI_split)
2571 264 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*natom))
2572 264 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks))
2573 642 : DO i_RI = 1, ri_data%ncell_RI
2574 1662 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
2575 3264 : bsizes_RI_ext_split((i_RI - 1)*nblks + 1:i_RI*nblks) = ri_data%bsizes_RI_split(:)
2576 : END DO
2577 : CALL create_2c_tensor(t_2c_RI_tmp(1), dist1, dist2, ri_data%pgrid_2d, &
2578 : bsizes_RI_ext, bsizes_RI_ext, &
2579 : name="(RI | RI)")
2580 88 : DEALLOCATE (dist1, dist2)
2581 : CALL create_2c_tensor(t_2c_RI_tmp(2), dist1, dist2, ri_data%pgrid_2d, &
2582 : bsizes_RI_ext_split, bsizes_RI_ext_split, &
2583 : name="(RI | RI)")
2584 88 : DEALLOCATE (dist1, dist2)
2585 :
2586 : !For more efficiency, we stack multiple images of the 3-center integrals into a single tensor
2587 88 : batch_size = ri_data%kp_stack_size
2588 88 : n_batch = nimg/batch_size
2589 88 : IF (MODULO(nimg, batch_size) /= 0) n_batch = n_batch + 1
2590 264 : ALLOCATE (batch_ranges(n_batch + 1))
2591 268 : DO i_batch = 1, n_batch
2592 268 : batch_ranges(i_batch) = (i_batch - 1)*batch_size + 1
2593 : END DO
2594 88 : batch_ranges(n_batch + 1) = nimg + 1
2595 :
2596 88 : nblks = SIZE(ri_data%bsizes_AO_split)
2597 264 : ALLOCATE (bsizes_stack(batch_size*nblks))
2598 1592 : DO is = 1, batch_size
2599 7320 : bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
2600 : END DO
2601 :
2602 88 : CALL dbt_get_info(t_3c_int(1, 1), nblks_total=nblks_3c)
2603 792 : ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)), dist_stack3(batch_size*nblks_3c(3)))
2604 88 : CALL dbt_get_info(t_3c_int(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, proc_dist_3=dist3)
2605 1592 : DO is = 1, batch_size
2606 7320 : dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
2607 : END DO
2608 :
2609 88 : CALL dbt_distribution_new(t_dist, ri_data%pgrid, dist1, dist2, dist_stack3)
2610 : CALL dbt_create(t_3c_tmp(1), "ints_stack", t_dist, [1], [2, 3], bsizes_RI_ext_split, &
2611 88 : ri_data%bsizes_AO_split, bsizes_stack)
2612 88 : CALL dbt_distribution_destroy(t_dist)
2613 88 : DEALLOCATE (dist1, dist2, dist3, dist_stack3)
2614 :
2615 88 : CALL dbt_create(t_3c_tmp(1), t_3c_tmp(2))
2616 88 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp(3))
2617 :
2618 264 : DO iatom = 1, natom
2619 176 : CALL dbt_copy(ri_data%t_2c_inv(1, iatom), t_2c_RI_tmp(1))
2620 176 : CALL apply_bump(t_2c_RI_tmp(1), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
2621 176 : CALL dbt_copy(t_2c_RI_tmp(1), t_2c_RI_tmp(2), move_data=.TRUE.)
2622 :
2623 176 : CALL dbt_batched_contract_init(t_2c_RI_tmp(2))
2624 536 : DO i_batch = 1, n_batch
2625 :
2626 : CALL fill_3c_stack(t_3c_tmp(1), t_3c_int(1, :), int_indices, 3, ri_data, &
2627 : img_bounds=[batch_ranges(i_batch), batch_ranges(i_batch + 1)], &
2628 1080 : filter_at=iatom, filter_dim=2, idx_to_at=idx_to_at_AO)
2629 :
2630 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp(2), t_3c_tmp(1), &
2631 : 0.0_dp, t_3c_tmp(2), map_1=[1], map_2=[2, 3], &
2632 : contract_1=[2], notcontract_1=[1], &
2633 : contract_2=[1], notcontract_2=[2, 3], &
2634 360 : filter_eps=ri_data%filter_eps, flop=nflop)
2635 360 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2636 :
2637 5068 : DO i_img = batch_ranges(i_batch), batch_ranges(i_batch + 1) - 1
2638 4708 : CALL unstack_t_3c_apc(t_3c_tmp(3), t_3c_tmp(2), i_img - batch_ranges(i_batch) + 1)
2639 : CALL dbt_copy(t_3c_tmp(3), ri_data%t_3c_int_ctr_1(1, i_img), summation=.TRUE., &
2640 5068 : order=[2, 1, 3], move_data=.TRUE.)
2641 : END DO
2642 536 : CALL dbt_clear(t_3c_tmp(1))
2643 : END DO
2644 264 : CALL dbt_batched_contract_finalize(t_2c_RI_tmp(2))
2645 :
2646 : END DO
2647 88 : CALL dbt_destroy(t_2c_RI_tmp(1))
2648 88 : CALL dbt_destroy(t_2c_RI_tmp(2))
2649 88 : CALL dbt_destroy(t_3c_tmp(1))
2650 88 : CALL dbt_destroy(t_3c_tmp(2))
2651 88 : CALL dbt_destroy(t_3c_tmp(3))
2652 :
2653 2442 : DO i_img = 1, nimg
2654 2442 : CALL dbt_destroy(t_3c_int(1, i_img))
2655 : END DO
2656 :
2657 88 : CALL timestop(handle)
2658 :
2659 440 : END SUBROUTINE precontract_3c_ints
2660 :
2661 : ! **************************************************************************************************
2662 : !> \brief Copy the data of a 2D tensor living in the main MPI group to a sub-group, given the proc
2663 : !> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2664 : !> \param t2c_sub ...
2665 : !> \param t2c_main ...
2666 : !> \param group_size ...
2667 : !> \param ngroups ...
2668 : !> \param para_env ...
2669 : ! **************************************************************************************************
2670 11138 : SUBROUTINE copy_2c_to_subgroup(t2c_sub, t2c_main, group_size, ngroups, para_env)
2671 : TYPE(dbt_type), INTENT(INOUT) :: t2c_sub, t2c_main
2672 : INTEGER, INTENT(IN) :: group_size, ngroups
2673 : TYPE(mp_para_env_type), POINTER :: para_env
2674 :
2675 : INTEGER :: batch_size, i, i_batch, i_msg, iblk, &
2676 : igroup, iproc, ir, is, jblk, n_batch, &
2677 : nocc, tag
2678 11138 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2
2679 11138 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: block_dest, block_source
2680 11138 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: current_dest
2681 : INTEGER, DIMENSION(2) :: ind, nblks
2682 : LOGICAL :: found
2683 11138 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2684 11138 : TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2685 : TYPE(dbt_iterator_type) :: iter
2686 11138 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2687 :
2688 : !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2689 : ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2690 : ! to have blocks pre-reserved though
2691 :
2692 11138 : CALL dbt_get_info(t2c_main, nblks_total=nblks)
2693 :
2694 : !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2695 44552 : ALLOCATE (block_source(nblks(1), nblks(2)))
2696 219202 : block_source = -1
2697 11138 : nocc = 0
2698 11138 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t2c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2699 : CALL dbt_iterator_start(iter, t2c_main)
2700 : DO WHILE (dbt_iterator_blocks_left(iter))
2701 : CALL dbt_iterator_next_block(iter, ind)
2702 : CALL dbt_get_block(t2c_main, ind, blk, found)
2703 : IF (.NOT. found) CYCLE
2704 :
2705 : block_source(ind(1), ind(2)) = para_env%mepos
2706 : !$OMP ATOMIC
2707 : nocc = nocc + 1
2708 : DEALLOCATE (blk)
2709 : END DO
2710 : CALL dbt_iterator_stop(iter)
2711 : !$OMP END PARALLEL
2712 :
2713 11138 : CALL para_env%sum(nocc)
2714 11138 : CALL para_env%sum(block_source)
2715 219202 : block_source = block_source + para_env%num_pe - 1
2716 11138 : IF (nocc == 0) RETURN
2717 :
2718 : !Loop over the sub tensor, get the block destination
2719 10810 : igroup = para_env%mepos/group_size
2720 32430 : ALLOCATE (block_dest(nblks(1), nblks(2)))
2721 216906 : block_dest = -1
2722 40266 : DO jblk = 1, nblks(2)
2723 216906 : DO iblk = 1, nblks(1)
2724 176640 : IF (block_source(iblk, jblk) == -1) CYCLE
2725 :
2726 135642 : CALL dbt_get_stored_coordinates(t2c_sub, [iblk, jblk], iproc)
2727 206096 : block_dest(iblk, jblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2728 : END DO
2729 : END DO
2730 :
2731 54050 : ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)))
2732 10810 : CALL dbt_get_info(t2c_main, blk_size_1=bsizes1, blk_size_2=bsizes2)
2733 :
2734 54050 : ALLOCATE (current_dest(nblks(1), nblks(2), 0:ngroups - 1))
2735 32430 : DO igroup = 0, ngroups - 1
2736 : !for a given subgroup, need to make the destination available to everyone in the main group
2737 433812 : current_dest(:, :, igroup) = block_dest(:, :)
2738 32430 : CALL para_env%bcast(current_dest(:, :, igroup), source=igroup*group_size) !bcast from first proc in sub-group
2739 : END DO
2740 :
2741 : !We go by batches, which cannot be larger than the maximum MPI tag value
2742 10810 : batch_size = MIN(para_env%get_tag_ub(), 128000, nocc*ngroups)
2743 10810 : n_batch = (nocc*ngroups)/batch_size
2744 10810 : IF (MODULO(nocc*ngroups, batch_size) /= 0) n_batch = n_batch + 1
2745 :
2746 21620 : DO i_batch = 1, n_batch
2747 : !Loop over groups, blocks and send/receive
2748 224096 : ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2749 224096 : ALLOCATE (send_req(batch_size), recv_req(batch_size))
2750 : ir = 0
2751 : is = 0
2752 : i_msg = 0
2753 40266 : DO jblk = 1, nblks(2)
2754 216906 : DO iblk = 1, nblks(1)
2755 559376 : DO igroup = 0, ngroups - 1
2756 353280 : IF (block_source(iblk, jblk) == -1) CYCLE
2757 :
2758 90428 : i_msg = i_msg + 1
2759 90428 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
2760 :
2761 : !a unique tag per block, within this batch
2762 90428 : tag = i_msg - (i_batch - 1)*batch_size
2763 :
2764 90428 : found = .FALSE.
2765 90428 : IF (para_env%mepos == block_source(iblk, jblk)) THEN
2766 135642 : CALL dbt_get_block(t2c_main, [iblk, jblk], blk, found)
2767 : END IF
2768 :
2769 : !If blocks live on same proc, simply copy. Else MPI send/recv
2770 90428 : IF (block_source(iblk, jblk) == current_dest(iblk, jblk, igroup)) THEN
2771 135642 : IF (found) CALL dbt_put_block(t2c_sub, [iblk, jblk], SHAPE(blk), blk)
2772 : ELSE
2773 45214 : IF (para_env%mepos == block_source(iblk, jblk) .AND. found) THEN
2774 90428 : ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2775 23151133 : send_buff(tag)%array(:, :) = blk(:, :)
2776 22607 : is = is + 1
2777 : CALL para_env%isend(msgin=send_buff(tag)%array, dest=current_dest(iblk, jblk, igroup), &
2778 22607 : request=send_req(is), tag=tag)
2779 : END IF
2780 :
2781 45214 : IF (para_env%mepos == current_dest(iblk, jblk, igroup)) THEN
2782 90428 : ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2783 22607 : ir = ir + 1
2784 : CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk), &
2785 22607 : request=recv_req(ir), tag=tag)
2786 : END IF
2787 : END IF
2788 :
2789 267068 : IF (found) DEALLOCATE (blk)
2790 : END DO
2791 : END DO
2792 : END DO
2793 :
2794 10810 : CALL mp_waitall(send_req(1:is))
2795 10810 : CALL mp_waitall(recv_req(1:ir))
2796 : !clean-up
2797 101238 : DO i = 1, batch_size
2798 101238 : IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2799 : END DO
2800 :
2801 : !Finally copy the data from the buffer to the sub-tensor
2802 : i_msg = 0
2803 40266 : DO jblk = 1, nblks(2)
2804 216906 : DO iblk = 1, nblks(1)
2805 559376 : DO igroup = 0, ngroups - 1
2806 353280 : IF (block_source(iblk, jblk) == -1) CYCLE
2807 :
2808 90428 : i_msg = i_msg + 1
2809 90428 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
2810 :
2811 : !a unique tag per block, within this batch
2812 90428 : tag = i_msg - (i_batch - 1)*batch_size
2813 :
2814 90428 : IF (para_env%mepos == current_dest(iblk, jblk, igroup) .AND. &
2815 176640 : block_source(iblk, jblk) /= current_dest(iblk, jblk, igroup)) THEN
2816 :
2817 90428 : ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk)))
2818 23151133 : blk(:, :) = recv_buff(tag)%array(:, :)
2819 113035 : CALL dbt_put_block(t2c_sub, [iblk, jblk], SHAPE(blk), blk)
2820 22607 : DEALLOCATE (blk)
2821 : END IF
2822 : END DO
2823 : END DO
2824 : END DO
2825 :
2826 : !clean-up
2827 101238 : DO i = 1, batch_size
2828 101238 : IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2829 : END DO
2830 21620 : DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2831 : END DO !i_batch
2832 10810 : CALL dbt_finalize(t2c_sub)
2833 :
2834 22276 : END SUBROUTINE copy_2c_to_subgroup
2835 :
2836 : ! **************************************************************************************************
2837 : !> \brief Pre-compute the destination of the block of a 3D tensor in various subgroups
2838 : !> \param subgroup_dest ...
2839 : !> \param t3c_sub ...
2840 : !> \param t3c_main ...
2841 : !> \param group_size ...
2842 : !> \param ngroups ...
2843 : !> \param para_env ...
2844 : ! **************************************************************************************************
2845 648 : SUBROUTINE get_3c_subgroup_dest(subgroup_dest, t3c_sub, t3c_main, group_size, ngroups, para_env)
2846 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :), &
2847 : INTENT(INOUT) :: subgroup_dest
2848 : TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2849 : INTEGER, INTENT(IN) :: group_size, ngroups
2850 : TYPE(mp_para_env_type), POINTER :: para_env
2851 :
2852 : INTEGER :: iblk, igroup, iproc, jblk, kblk
2853 648 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_dest
2854 : INTEGER, DIMENSION(3) :: nblks
2855 :
2856 648 : CALL dbt_get_info(t3c_main, nblks_total=nblks)
2857 :
2858 : !Loop over the sub tensor, get the block destination
2859 648 : igroup = para_env%mepos/group_size
2860 3240 : ALLOCATE (block_dest(nblks(1), nblks(2), nblks(3)))
2861 1944 : DO kblk = 1, nblks(3)
2862 10632 : DO jblk = 1, nblks(2)
2863 39552 : DO iblk = 1, nblks(1)
2864 118272 : CALL dbt_get_stored_coordinates(t3c_sub, [iblk, jblk, kblk], iproc)
2865 38256 : block_dest(iblk, jblk, kblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2866 : END DO
2867 : END DO
2868 : END DO
2869 :
2870 3888 : ALLOCATE (subgroup_dest(nblks(1), nblks(2), nblks(3), ngroups))
2871 1944 : DO igroup = 0, ngroups - 1
2872 : !for a given subgroup, need to make the destination available to everyone in the main group
2873 80400 : subgroup_dest(:, :, :, igroup + 1) = block_dest(:, :, :)
2874 1944 : CALL para_env%bcast(subgroup_dest(:, :, :, igroup + 1), source=igroup*group_size) !bcast from first proc in subgroup
2875 : END DO
2876 :
2877 648 : END SUBROUTINE get_3c_subgroup_dest
2878 :
2879 : ! **************************************************************************************************
2880 : !> \brief Copy the data of a 3D tensor living in the main MPI group to a sub-group, given the proc
2881 : !> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2882 : !> \param t3c_sub ...
2883 : !> \param t3c_main ...
2884 : !> \param ngroups ...
2885 : !> \param para_env ...
2886 : !> \param subgroup_dest ...
2887 : !> \param iatom_to_subgroup ...
2888 : !> \param dim_at ...
2889 : !> \param idx_to_at ...
2890 : ! **************************************************************************************************
2891 17098 : SUBROUTINE copy_3c_to_subgroup(t3c_sub, t3c_main, ngroups, para_env, subgroup_dest, &
2892 17098 : iatom_to_subgroup, dim_at, idx_to_at)
2893 : TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2894 : INTEGER, INTENT(IN) :: ngroups
2895 : TYPE(mp_para_env_type), POINTER :: para_env
2896 : INTEGER, DIMENSION(:, :, :, :), INTENT(IN) :: subgroup_dest
2897 : TYPE(cp_1d_logical_p_type), DIMENSION(:), &
2898 : INTENT(INOUT), OPTIONAL :: iatom_to_subgroup
2899 : INTEGER, INTENT(IN), OPTIONAL :: dim_at
2900 : INTEGER, DIMENSION(:), OPTIONAL :: idx_to_at
2901 :
2902 : INTEGER :: batch_size, i, i_batch, i_msg, iatom, &
2903 : iblk, igroup, ir, is, isbuff, jblk, &
2904 : kblk, n_batch, nocc, tag
2905 17098 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2, bsizes3
2906 17098 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_source
2907 : INTEGER, DIMENSION(3) :: ind, nblks
2908 : LOGICAL :: filter_at, found
2909 17098 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
2910 17098 : TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2911 : TYPE(dbt_iterator_type) :: iter
2912 17098 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2913 :
2914 : !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2915 : ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2916 : ! to have blocks pre-reserved though
2917 :
2918 17098 : CALL dbt_get_info(t3c_main, nblks_total=nblks)
2919 :
2920 : !in some cases, only copy a fraction of the 3c tensor to a given subgroup (corresponding to some atoms)
2921 17098 : filter_at = .FALSE.
2922 17098 : IF (PRESENT(iatom_to_subgroup) .AND. PRESENT(dim_at) .AND. PRESENT(idx_to_at)) THEN
2923 9992 : filter_at = .TRUE.
2924 9992 : CPASSERT(nblks(dim_at) == SIZE(idx_to_at))
2925 : END IF
2926 :
2927 : !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2928 85490 : ALLOCATE (block_source(nblks(1), nblks(2), nblks(3)))
2929 1280102 : block_source = -1
2930 17098 : nocc = 0
2931 17098 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t3c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2932 : CALL dbt_iterator_start(iter, t3c_main)
2933 : DO WHILE (dbt_iterator_blocks_left(iter))
2934 : CALL dbt_iterator_next_block(iter, ind)
2935 : CALL dbt_get_block(t3c_main, ind, blk, found)
2936 : IF (.NOT. found) CYCLE
2937 :
2938 : block_source(ind(1), ind(2), ind(3)) = para_env%mepos
2939 : !$OMP ATOMIC
2940 : nocc = nocc + 1
2941 : DEALLOCATE (blk)
2942 : END DO
2943 : CALL dbt_iterator_stop(iter)
2944 : !$OMP END PARALLEL
2945 :
2946 17098 : CALL para_env%sum(nocc)
2947 17098 : CALL para_env%sum(block_source)
2948 1280102 : block_source = block_source + para_env%num_pe - 1
2949 17098 : IF (nocc == 0) RETURN
2950 :
2951 119686 : ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)), bsizes3(nblks(3)))
2952 17098 : CALL dbt_get_info(t3c_main, blk_size_1=bsizes1, blk_size_2=bsizes2, blk_size_3=bsizes3)
2953 :
2954 : !We go by batches, which cannot be larger than the maximum MPI tag value
2955 17098 : batch_size = MIN(para_env%get_tag_ub(), 128000, nocc*ngroups)
2956 17098 : n_batch = (nocc*ngroups)/batch_size
2957 17098 : IF (MODULO(nocc*ngroups, batch_size) /= 0) n_batch = n_batch + 1
2958 :
2959 34196 : DO i_batch = 1, n_batch
2960 : !Loop over groups, blocks and send/receive
2961 846736 : ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2962 846736 : ALLOCATE (send_req(batch_size), recv_req(batch_size))
2963 : ir = 0
2964 : is = 0
2965 : i_msg = 0
2966 : isbuff = 0
2967 51294 : DO kblk = 1, nblks(3)
2968 334902 : DO jblk = 1, nblks(2)
2969 1263004 : DO iblk = 1, nblks(1)
2970 945200 : IF (block_source(iblk, jblk, kblk) == -1) CYCLE
2971 :
2972 194586 : found = .FALSE.
2973 194586 : IF (para_env%mepos == block_source(iblk, jblk, kblk)) THEN
2974 389172 : CALL dbt_get_block(t3c_main, [iblk, jblk, kblk], blk, found)
2975 97293 : IF (found) THEN
2976 97293 : isbuff = isbuff + 1
2977 486465 : ALLOCATE (send_buff(isbuff)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2978 : END IF
2979 : END IF
2980 :
2981 583758 : DO igroup = 0, ngroups - 1
2982 :
2983 389172 : i_msg = i_msg + 1
2984 389172 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
2985 :
2986 : !a unique tag per block, within this batch
2987 389172 : tag = i_msg - (i_batch - 1)*batch_size
2988 :
2989 389172 : IF (filter_at) THEN
2990 1133584 : ind(:) = [iblk, jblk, kblk]
2991 283396 : iatom = idx_to_at(ind(dim_at))
2992 283396 : IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) CYCLE
2993 : END IF
2994 :
2995 : !If blocks live on same proc, simply copy. Else MPI send/recv
2996 442060 : IF (block_source(iblk, jblk, kblk) == subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
2997 531232 : IF (found) CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], SHAPE(blk), blk)
2998 : ELSE
2999 114666 : IF (para_env%mepos == block_source(iblk, jblk, kblk) .AND. found) THEN
3000 134557620 : send_buff(isbuff)%array(:, :, :) = blk(:, :, :)
3001 57333 : is = is + 1
3002 : CALL para_env%isend(msgin=send_buff(isbuff)%array, &
3003 : dest=subgroup_dest(iblk, jblk, kblk, igroup + 1), &
3004 57333 : request=send_req(is), tag=tag)
3005 : END IF
3006 :
3007 114666 : IF (para_env%mepos == subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
3008 286665 : ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
3009 57333 : ir = ir + 1
3010 : CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk, kblk), &
3011 57333 : request=recv_req(ir), tag=tag)
3012 : END IF
3013 : END IF
3014 : END DO !igroup
3015 :
3016 478194 : IF (found) DEALLOCATE (blk)
3017 : END DO
3018 : END DO
3019 : END DO
3020 :
3021 : !Finally copy the data from the buffer to the sub-tensor
3022 : i_msg = 0
3023 : ir = 0
3024 51294 : DO kblk = 1, nblks(3)
3025 334902 : DO jblk = 1, nblks(2)
3026 1263004 : DO iblk = 1, nblks(1)
3027 3119208 : DO igroup = 0, ngroups - 1
3028 1890400 : IF (block_source(iblk, jblk, kblk) == -1) CYCLE
3029 :
3030 389172 : i_msg = i_msg + 1
3031 389172 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
3032 :
3033 : !a unique tag per block, within this batch
3034 389172 : tag = i_msg - (i_batch - 1)*batch_size
3035 :
3036 389172 : IF (filter_at) THEN
3037 1133584 : ind(:) = [iblk, jblk, kblk]
3038 283396 : iatom = idx_to_at(ind(dim_at))
3039 283396 : IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) CYCLE
3040 : END IF
3041 :
3042 247474 : IF (para_env%mepos == subgroup_dest(iblk, jblk, kblk, igroup + 1) .AND. &
3043 945200 : block_source(iblk, jblk, kblk) /= subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
3044 :
3045 57333 : ir = ir + 1
3046 57333 : CALL mp_waitall(recv_req(ir:ir))
3047 401331 : CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], SHAPE(recv_buff(tag)%array), recv_buff(tag)%array)
3048 : END IF
3049 : END DO
3050 : END DO
3051 : END DO
3052 : END DO
3053 :
3054 : !clean-up
3055 17098 : CALL mp_waitall(send_req(1:is))
3056 406270 : DO i = 1, batch_size
3057 389172 : IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3058 406270 : IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3059 : END DO
3060 34196 : DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3061 : END DO !i_batch
3062 17098 : CALL dbt_finalize(t3c_sub)
3063 :
3064 34196 : END SUBROUTINE copy_3c_to_subgroup
3065 :
3066 : ! **************************************************************************************************
3067 : !> \brief A routine that gather the pieces of the KS matrix accross the subgroup and puts it in the
3068 : !> main group. Each b_img, iatom, jatom tuple is one a single CPU
3069 : !> \param ks_t ...
3070 : !> \param ks_t_sub ...
3071 : !> \param group_size ...
3072 : !> \param sparsity_pattern ...
3073 : !> \param para_env ...
3074 : !> \param ri_data ...
3075 : ! **************************************************************************************************
3076 274 : SUBROUTINE gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
3077 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t, ks_t_sub
3078 : INTEGER, INTENT(IN) :: group_size
3079 : INTEGER, DIMENSION(:, :, :), INTENT(IN) :: sparsity_pattern
3080 : TYPE(mp_para_env_type), POINTER :: para_env
3081 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3082 :
3083 : CHARACTER(len=*), PARAMETER :: routineN = 'gather_ks_matrix'
3084 :
3085 : INTEGER :: b_img, dest, handle, i, i_spin, iatom, &
3086 : igroup, ir, is, jatom, n_mess, natom, &
3087 : nimg, nspins, source, tag
3088 : LOGICAL :: found
3089 274 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3090 274 : TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
3091 274 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
3092 :
3093 274 : CALL timeset(routineN, handle)
3094 :
3095 274 : nimg = SIZE(sparsity_pattern, 3)
3096 274 : natom = SIZE(sparsity_pattern, 2)
3097 274 : nspins = SIZE(ks_t, 1)
3098 :
3099 8162 : DO b_img = 1, nimg
3100 : n_mess = 0
3101 17342 : DO i_spin = 1, nspins
3102 36250 : DO jatom = 1, natom
3103 66178 : DO iatom = 1, natom
3104 56724 : IF (sparsity_pattern(iatom, jatom, b_img) > -1) n_mess = n_mess + 1
3105 : END DO
3106 : END DO
3107 : END DO
3108 :
3109 57572 : ALLOCATE (send_buff(n_mess), recv_buff(n_mess))
3110 65460 : ALLOCATE (send_req(n_mess), recv_req(n_mess))
3111 7888 : ir = 0
3112 7888 : is = 0
3113 7888 : n_mess = 0
3114 7888 : tag = 0
3115 :
3116 17342 : DO i_spin = 1, nspins
3117 36250 : DO jatom = 1, natom
3118 66178 : DO iatom = 1, natom
3119 37816 : IF (sparsity_pattern(iatom, jatom, b_img) < 0) CYCLE
3120 12710 : n_mess = n_mess + 1
3121 12710 : tag = tag + 1
3122 :
3123 : !sending the message
3124 38130 : CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3125 38130 : CALL dbt_get_stored_coordinates(ks_t_sub(i_spin, b_img), [iatom, jatom], source) !source within sub
3126 12710 : igroup = sparsity_pattern(iatom, jatom, b_img)
3127 12710 : source = source + igroup*group_size
3128 12710 : IF (para_env%mepos == source) THEN
3129 19065 : CALL dbt_get_block(ks_t_sub(i_spin, b_img), [iatom, jatom], blk, found)
3130 6355 : IF (source == dest) THEN
3131 4133 : IF (found) CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], SHAPE(blk), blk)
3132 : ELSE
3133 20872 : ALLOCATE (send_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3134 328700 : send_buff(n_mess)%array(:, :) = 0.0_dp
3135 5218 : IF (found) THEN
3136 230014 : send_buff(n_mess)%array(:, :) = blk(:, :)
3137 : END IF
3138 5218 : is = is + 1
3139 : CALL para_env%isend(msgin=send_buff(n_mess)%array, dest=dest, &
3140 5218 : request=send_req(is), tag=tag)
3141 : END IF
3142 6355 : DEALLOCATE (blk)
3143 : END IF
3144 :
3145 : !receiving the message
3146 31618 : IF (para_env%mepos == dest .AND. source /= dest) THEN
3147 20872 : ALLOCATE (recv_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3148 5218 : ir = ir + 1
3149 : CALL para_env%irecv(msgout=recv_buff(n_mess)%array, source=source, &
3150 5218 : request=recv_req(ir), tag=tag)
3151 : END IF
3152 : END DO !iatom
3153 : END DO !jatom
3154 : END DO !ispin
3155 :
3156 7888 : CALL mp_waitall(send_req(1:is))
3157 7888 : CALL mp_waitall(recv_req(1:ir))
3158 :
3159 : !Copy the messages received into the KS matrix
3160 7888 : n_mess = 0
3161 17342 : DO i_spin = 1, nspins
3162 36250 : DO jatom = 1, natom
3163 66178 : DO iatom = 1, natom
3164 37816 : IF (sparsity_pattern(iatom, jatom, b_img) < 0) CYCLE
3165 12710 : n_mess = n_mess + 1
3166 :
3167 38130 : CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3168 31618 : IF (para_env%mepos == dest) THEN
3169 6355 : IF (.NOT. ASSOCIATED(recv_buff(n_mess)%array)) CYCLE
3170 20872 : ALLOCATE (blk(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3171 328700 : blk(:, :) = recv_buff(n_mess)%array(:, :)
3172 26090 : CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], SHAPE(blk), blk)
3173 5218 : DEALLOCATE (blk)
3174 : END IF
3175 : END DO
3176 : END DO
3177 : END DO
3178 :
3179 : !clean-up
3180 20598 : DO i = 1, n_mess
3181 12710 : IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3182 20598 : IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3183 : END DO
3184 8162 : DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3185 : END DO !b_img
3186 :
3187 274 : CALL timestop(handle)
3188 :
3189 274 : END SUBROUTINE gather_ks_matrix
3190 :
3191 : ! **************************************************************************************************
3192 : !> \brief copy all required 2c tensors from the main MPI group to the subgroups
3193 : !> \param mat_2c_pot ...
3194 : !> \param t_2c_work ...
3195 : !> \param t_2c_ao_tmp ...
3196 : !> \param ks_t_split ...
3197 : !> \param ks_t_sub ...
3198 : !> \param group_size ...
3199 : !> \param ngroups ...
3200 : !> \param para_env ...
3201 : !> \param para_env_sub ...
3202 : !> \param ri_data ...
3203 : ! **************************************************************************************************
3204 274 : SUBROUTINE get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
3205 : group_size, ngroups, para_env, para_env_sub, ri_data)
3206 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3207 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work, t_2c_ao_tmp, ks_t_split
3208 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t_sub
3209 : INTEGER, INTENT(IN) :: group_size, ngroups
3210 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3211 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3212 :
3213 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_2c_tensors'
3214 :
3215 : INTEGER :: handle, i, i_img, i_RI, i_spin, iproc, &
3216 : j, natom, nblks, nimg, nspins
3217 : INTEGER(int_8) :: nze
3218 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3219 274 : dist1, dist2
3220 : INTEGER, DIMENSION(2) :: pdims_2d
3221 548 : INTEGER, DIMENSION(:), POINTER :: col_dist, RI_blk_size, row_dist
3222 274 : INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3223 : REAL(dp) :: occ
3224 : TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3225 822 : TYPE(dbt_pgrid_type) :: pgrid_2d
3226 3562 : TYPE(dbt_type) :: work, work_sub
3227 :
3228 274 : CALL timeset(routineN, handle)
3229 :
3230 : !Create the 2d pgrid
3231 274 : pdims_2d = 0
3232 274 : CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3233 :
3234 274 : natom = SIZE(ri_data%bsizes_RI)
3235 274 : nblks = SIZE(ri_data%bsizes_RI_split)
3236 822 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*natom))
3237 822 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks))
3238 1806 : DO i_RI = 1, ri_data%ncell_RI
3239 4596 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
3240 8632 : bsizes_RI_ext_split((i_RI - 1)*nblks + 1:i_RI*nblks) = ri_data%bsizes_RI_split(:)
3241 : END DO
3242 :
3243 : !nRI x nRI 2c tensors
3244 : CALL create_2c_tensor(t_2c_work(1), dist1, dist2, pgrid_2d, &
3245 : bsizes_RI_ext, bsizes_RI_ext, &
3246 : name="(RI | RI)")
3247 274 : DEALLOCATE (dist1, dist2)
3248 :
3249 : CALL create_2c_tensor(t_2c_work(2), dist1, dist2, pgrid_2d, &
3250 : bsizes_RI_ext_split, bsizes_RI_ext_split, &
3251 274 : name="(RI | RI)")
3252 274 : DEALLOCATE (dist1, dist2)
3253 :
3254 : !the AO based tensors
3255 : CALL create_2c_tensor(ks_t_split(1), dist1, dist2, pgrid_2d, &
3256 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3257 : name="(AO | AO)")
3258 274 : DEALLOCATE (dist1, dist2)
3259 274 : CALL dbt_create(ks_t_split(1), ks_t_split(2))
3260 :
3261 : CALL create_2c_tensor(t_2c_ao_tmp(1), dist1, dist2, pgrid_2d, &
3262 : ri_data%bsizes_AO, ri_data%bsizes_AO, &
3263 : name="(AO | AO)")
3264 274 : DEALLOCATE (dist1, dist2)
3265 :
3266 274 : nspins = SIZE(ks_t_sub, 1)
3267 274 : nimg = SIZE(ks_t_sub, 2)
3268 8162 : DO i_img = 1, nimg
3269 17616 : DO i_spin = 1, nspins
3270 17342 : CALL dbt_create(t_2c_ao_tmp(1), ks_t_sub(i_spin, i_img))
3271 : END DO
3272 : END DO
3273 :
3274 : !Finally the HFX potential matrices
3275 : !For now, we do a convoluted things where we go to tensors first, then back to matrices.
3276 : CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3277 : ri_data%bsizes_RI, ri_data%bsizes_RI, &
3278 : name="(RI | RI)")
3279 274 : CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3280 :
3281 1096 : ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3282 274 : iproc = 0
3283 548 : DO i = 0, pdims_2d(1) - 1
3284 822 : DO j = 0, pdims_2d(2) - 1
3285 274 : dbcsr_pgrid(i, j) = iproc
3286 548 : iproc = iproc + 1
3287 : END DO
3288 : END DO
3289 :
3290 : !We need to have the same exact 2d block dist as the tensors
3291 1096 : ALLOCATE (col_dist(natom), row_dist(natom))
3292 822 : row_dist(:) = dist1(:)
3293 822 : col_dist(:) = dist2(:)
3294 :
3295 548 : ALLOCATE (RI_blk_size(natom))
3296 822 : RI_blk_size(:) = ri_data%bsizes_RI(:)
3297 :
3298 : CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3299 274 : row_dist=row_dist, col_dist=col_dist)
3300 : CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3301 274 : row_blk_size=RI_blk_size, col_blk_size=RI_blk_size)
3302 :
3303 8162 : DO i_img = 1, nimg
3304 7888 : IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3305 7888 : CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3306 7888 : CALL get_tensor_occupancy(work, nze, occ)
3307 7888 : IF (nze == 0) CYCLE
3308 :
3309 5656 : CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3310 5656 : CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3311 5656 : CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3312 13818 : CALL dbt_clear(work_sub)
3313 : END DO
3314 :
3315 274 : CALL dbt_destroy(work)
3316 274 : CALL dbt_destroy(work_sub)
3317 274 : CALL dbt_pgrid_destroy(pgrid_2d)
3318 274 : CALL dbcsr_distribution_release(dbcsr_dist_sub)
3319 274 : DEALLOCATE (col_dist, row_dist, RI_blk_size, dbcsr_pgrid)
3320 274 : CALL timestop(handle)
3321 :
3322 2466 : END SUBROUTINE get_subgroup_2c_tensors
3323 :
3324 : ! **************************************************************************************************
3325 : !> \brief copy all required 3c tensors from the main MPI group to the subgroups
3326 : !> \param t_3c_int ...
3327 : !> \param t_3c_work_2 ...
3328 : !> \param t_3c_work_3 ...
3329 : !> \param t_3c_apc ...
3330 : !> \param t_3c_apc_sub ...
3331 : !> \param group_size ...
3332 : !> \param ngroups ...
3333 : !> \param para_env ...
3334 : !> \param para_env_sub ...
3335 : !> \param ri_data ...
3336 : ! **************************************************************************************************
3337 274 : SUBROUTINE get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
3338 : group_size, ngroups, para_env, para_env_sub, ri_data)
3339 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_int, t_3c_work_2, t_3c_work_3
3340 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, t_3c_apc_sub
3341 : INTEGER, INTENT(IN) :: group_size, ngroups
3342 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3343 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3344 :
3345 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_3c_tensors'
3346 :
3347 : INTEGER :: batch_size, bo(2), handle, handle2, &
3348 : i_blk, i_img, i_RI, i_spin, ib, natom, &
3349 : nblks_AO, nblks_RI, nimg, nspins
3350 : INTEGER(int_8) :: nze
3351 274 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3352 274 : bsizes_stack, bsizes_tmp, dist1, &
3353 274 : dist2, dist3, dist_stack, idx_to_at
3354 274 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: subgroup_dest
3355 : INTEGER, DIMENSION(3) :: pdims
3356 : REAL(dp) :: occ
3357 2466 : TYPE(dbt_distribution_type) :: t_dist
3358 822 : TYPE(dbt_pgrid_type) :: pgrid
3359 6850 : TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3360 :
3361 274 : CALL timeset(routineN, handle)
3362 :
3363 274 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
3364 822 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks_RI))
3365 1806 : DO i_RI = 1, ri_data%ncell_RI
3366 8632 : bsizes_RI_ext_split((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
3367 : END DO
3368 :
3369 : !Preparing larger block sizes for efficient communication (less, bigger messages)
3370 274 : natom = SIZE(ri_data%bsizes_RI)
3371 274 : nblks_RI = natom
3372 822 : ALLOCATE (bsizes_tmp(nblks_RI))
3373 822 : DO i_blk = 1, nblks_RI
3374 548 : bo = get_limit(natom, nblks_RI, i_blk - 1)
3375 1370 : bsizes_tmp(i_blk) = SUM(ri_data%bsizes_RI(bo(1):bo(2)))
3376 : END DO
3377 822 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*nblks_RI))
3378 1806 : DO i_RI = 1, ri_data%ncell_RI
3379 4870 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = bsizes_tmp(:)
3380 : END DO
3381 :
3382 274 : batch_size = ri_data%kp_stack_size
3383 274 : nblks_AO = SIZE(ri_data%bsizes_AO_split)
3384 822 : ALLOCATE (bsizes_stack(batch_size*nblks_AO))
3385 5586 : DO ib = 1, batch_size
3386 22898 : bsizes_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = ri_data%bsizes_AO_split(:)
3387 : END DO
3388 :
3389 : !Create the pgrid for the configuration correspoinding to ri_data%t_3c_int_ctr_3
3390 274 : natom = SIZE(ri_data%bsizes_RI)
3391 274 : pdims = 0
3392 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3393 1096 : tensor_dims=[SIZE(bsizes_RI_ext_split), 1, batch_size*SIZE(ri_data%bsizes_AO_split)])
3394 :
3395 : !Create all required 3c tensors in that configuration
3396 : CALL create_3c_tensor(t_3c_int(1), dist1, dist2, dist3, &
3397 : pgrid, bsizes_RI_ext_split, ri_data%bsizes_AO_split, &
3398 274 : ri_data%bsizes_AO_split, [1], [2, 3], name="(RI | AO AO)")
3399 274 : nimg = SIZE(t_3c_int)
3400 7888 : DO i_img = 2, nimg
3401 7888 : CALL dbt_create(t_3c_int(1), t_3c_int(i_img))
3402 : END DO
3403 :
3404 : !The stacked work tensors, in a distribution that matches that of t_3c_int
3405 548 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3406 5586 : DO ib = 1, batch_size
3407 22898 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3408 : END DO
3409 :
3410 274 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3411 : CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3412 274 : bsizes_RI_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3413 274 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3414 274 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3415 274 : CALL dbt_distribution_destroy(t_dist)
3416 274 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3417 :
3418 : !For more efficient communication, we use intermediate tensors with larger block size
3419 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3420 : pgrid, bsizes_RI_ext, ri_data%bsizes_AO, &
3421 274 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3422 274 : DEALLOCATE (dist1, dist2, dist3)
3423 :
3424 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3425 : ri_data%pgrid, bsizes_RI_ext, ri_data%bsizes_AO, &
3426 274 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3427 274 : DEALLOCATE (dist1, dist2, dist3)
3428 :
3429 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3430 274 : group_size, ngroups, para_env)
3431 :
3432 : !Finally copy the integrals into the subgroups (if not there already)
3433 274 : CALL timeset(routineN//"_ints", handle2)
3434 274 : IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
3435 5720 : DO i_img = 1, nimg
3436 5720 : CALL dbt_copy(ri_data%kp_t_3c_int(i_img), t_3c_int(i_img), move_data=.TRUE.)
3437 : END DO
3438 : ELSE
3439 3322 : ALLOCATE (ri_data%kp_t_3c_int(nimg))
3440 2442 : DO i_img = 1, nimg
3441 2354 : CALL dbt_create(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img))
3442 2354 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, i_img), nze, occ)
3443 2354 : IF (nze == 0) CYCLE
3444 2128 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, i_img), work_atom_block, order=[2, 1, 3])
3445 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3446 2128 : ngroups, para_env, subgroup_dest)
3447 2128 : CALL dbt_copy(work_atom_block_sub, t_3c_int(i_img), move_data=.TRUE.)
3448 4570 : CALL dbt_filter(t_3c_int(i_img), ri_data%filter_eps)
3449 : END DO
3450 : END IF
3451 274 : CALL timestop(handle2)
3452 274 : CALL dbt_pgrid_destroy(pgrid)
3453 274 : CALL dbt_destroy(work_atom_block)
3454 274 : CALL dbt_destroy(work_atom_block_sub)
3455 274 : DEALLOCATE (subgroup_dest)
3456 :
3457 : !Do the same for the t_3c_ctr_2 configuration
3458 274 : pdims = 0
3459 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3460 1096 : tensor_dims=[1, SIZE(bsizes_RI_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3461 :
3462 : !For more efficient communication, we use intermediate tensors with larger block size
3463 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3464 : pgrid, ri_data%bsizes_AO, bsizes_RI_ext, &
3465 274 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3466 274 : DEALLOCATE (dist1, dist2, dist3)
3467 :
3468 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3469 : ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_RI_ext, &
3470 274 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3471 274 : DEALLOCATE (dist1, dist2, dist3)
3472 :
3473 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3474 274 : group_size, ngroups, para_env)
3475 :
3476 : !template for t_3c_apc_sub
3477 : CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3478 : pgrid, ri_data%bsizes_AO_split, bsizes_RI_ext_split, &
3479 274 : ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3480 :
3481 : !create t_3c_work_2 tensors in a distribution that matches the above
3482 548 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3483 5586 : DO ib = 1, batch_size
3484 22898 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3485 : END DO
3486 :
3487 274 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3488 : CALL dbt_create(t_3c_work_2(1), "work_2_stack", t_dist, [1], [2, 3], &
3489 274 : ri_data%bsizes_AO_split, bsizes_RI_ext_split, bsizes_stack)
3490 274 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3491 274 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3492 274 : CALL dbt_distribution_destroy(t_dist)
3493 274 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3494 :
3495 : !Finally copy data from t_3c_apc to the subgroups
3496 822 : ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3497 274 : CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3498 274 : nspins = SIZE(t_3c_apc, 1)
3499 274 : CALL timeset(routineN//"_apc", handle2)
3500 8162 : DO i_img = 1, nimg
3501 17342 : DO i_spin = 1, nspins
3502 9454 : CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3503 9454 : CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3504 9454 : IF (nze == 0) CYCLE
3505 8438 : CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.TRUE.)
3506 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, ngroups, para_env, &
3507 8438 : subgroup_dest, ri_data%iatom_to_subgroup, 1, idx_to_at)
3508 8438 : CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.TRUE.)
3509 25780 : CALL dbt_filter(t_3c_apc_sub(i_spin, i_img), ri_data%filter_eps)
3510 : END DO
3511 17616 : DO i_spin = 1, nspins
3512 17342 : CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3513 : END DO
3514 : END DO
3515 274 : CALL timestop(handle2)
3516 274 : CALL dbt_pgrid_destroy(pgrid)
3517 274 : CALL dbt_destroy(tmp)
3518 274 : CALL dbt_destroy(work_atom_block)
3519 274 : CALL dbt_destroy(work_atom_block_sub)
3520 :
3521 274 : CALL timestop(handle)
3522 :
3523 1096 : END SUBROUTINE get_subgroup_3c_tensors
3524 :
3525 : ! **************************************************************************************************
3526 : !> \brief copy all required 2c force tensors from the main MPI group to the subgroups
3527 : !> \param t_2c_inv ...
3528 : !> \param t_2c_bint ...
3529 : !> \param t_2c_metric ...
3530 : !> \param mat_2c_pot ...
3531 : !> \param t_2c_work ...
3532 : !> \param rho_ao_t ...
3533 : !> \param rho_ao_t_sub ...
3534 : !> \param t_2c_der_metric ...
3535 : !> \param t_2c_der_metric_sub ...
3536 : !> \param mat_der_pot ...
3537 : !> \param mat_der_pot_sub ...
3538 : !> \param group_size ...
3539 : !> \param ngroups ...
3540 : !> \param para_env ...
3541 : !> \param para_env_sub ...
3542 : !> \param ri_data ...
3543 : !> \note Main MPI group tensors are deleted within this routine, for memory optimization
3544 : ! **************************************************************************************************
3545 100 : SUBROUTINE get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
3546 50 : rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
3547 50 : mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
3548 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_inv, t_2c_bint, t_2c_metric
3549 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3550 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work
3551 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
3552 : t_2c_der_metric_sub
3553 : TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot, mat_der_pot_sub
3554 : INTEGER, INTENT(IN) :: group_size, ngroups
3555 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3556 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3557 :
3558 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_2c_derivs'
3559 :
3560 : INTEGER :: handle, i, i_img, i_RI, i_spin, i_xyz, &
3561 : iatom, iproc, j, natom, nblks, nimg, &
3562 : nspins
3563 : INTEGER(int_8) :: nze
3564 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3565 50 : dist1, dist2
3566 : INTEGER, DIMENSION(2) :: pdims_2d
3567 100 : INTEGER, DIMENSION(:), POINTER :: col_dist, RI_blk_size, row_dist
3568 50 : INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3569 : REAL(dp) :: occ
3570 : TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3571 150 : TYPE(dbt_pgrid_type) :: pgrid_2d
3572 650 : TYPE(dbt_type) :: work, work_sub
3573 :
3574 50 : CALL timeset(routineN, handle)
3575 :
3576 : !Note: a fair portion of this routine is copied from the energy version of it
3577 : !Create the 2d pgrid
3578 50 : pdims_2d = 0
3579 50 : CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3580 :
3581 50 : natom = SIZE(ri_data%bsizes_RI)
3582 50 : nblks = SIZE(ri_data%bsizes_RI_split)
3583 150 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*natom))
3584 150 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks))
3585 366 : DO i_RI = 1, ri_data%ncell_RI
3586 948 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
3587 1802 : bsizes_RI_ext_split((i_RI - 1)*nblks + 1:i_RI*nblks) = ri_data%bsizes_RI_split(:)
3588 : END DO
3589 :
3590 : !nRI x nRI 2c tensors
3591 : CALL create_2c_tensor(t_2c_inv(1), dist1, dist2, pgrid_2d, &
3592 : bsizes_RI_ext, bsizes_RI_ext, &
3593 : name="(RI | RI)")
3594 50 : DEALLOCATE (dist1, dist2)
3595 :
3596 50 : CALL dbt_create(t_2c_inv(1), t_2c_bint(1))
3597 50 : CALL dbt_create(t_2c_inv(1), t_2c_metric(1))
3598 100 : DO iatom = 2, natom
3599 50 : CALL dbt_create(t_2c_inv(1), t_2c_inv(iatom))
3600 50 : CALL dbt_create(t_2c_inv(1), t_2c_bint(iatom))
3601 100 : CALL dbt_create(t_2c_inv(1), t_2c_metric(iatom))
3602 : END DO
3603 50 : CALL dbt_create(t_2c_inv(1), t_2c_work(1))
3604 50 : CALL dbt_create(t_2c_inv(1), t_2c_work(2))
3605 50 : CALL dbt_create(t_2c_inv(1), t_2c_work(3))
3606 50 : CALL dbt_create(t_2c_inv(1), t_2c_work(4))
3607 :
3608 : CALL create_2c_tensor(t_2c_work(5), dist1, dist2, pgrid_2d, &
3609 : bsizes_RI_ext_split, bsizes_RI_ext_split, &
3610 50 : name="(RI | RI)")
3611 50 : DEALLOCATE (dist1, dist2)
3612 :
3613 : !copy the data from the main group.
3614 150 : DO iatom = 1, natom
3615 100 : CALL copy_2c_to_subgroup(t_2c_inv(iatom), ri_data%t_2c_inv(1, iatom), group_size, ngroups, para_env)
3616 100 : CALL copy_2c_to_subgroup(t_2c_bint(iatom), ri_data%t_2c_int(1, iatom), group_size, ngroups, para_env)
3617 150 : CALL copy_2c_to_subgroup(t_2c_metric(iatom), ri_data%t_2c_pot(1, iatom), group_size, ngroups, para_env)
3618 : END DO
3619 :
3620 : !This includes the derivatives of the RI metric, for which there is one per atom
3621 200 : DO i_xyz = 1, 3
3622 500 : DO iatom = 1, natom
3623 300 : CALL dbt_create(t_2c_inv(1), t_2c_der_metric_sub(iatom, i_xyz))
3624 : CALL copy_2c_to_subgroup(t_2c_der_metric_sub(iatom, i_xyz), t_2c_der_metric(iatom, i_xyz), &
3625 300 : group_size, ngroups, para_env)
3626 450 : CALL dbt_destroy(t_2c_der_metric(iatom, i_xyz))
3627 : END DO
3628 : END DO
3629 :
3630 : !AO x AO 2c tensors
3631 : CALL create_2c_tensor(rho_ao_t_sub(1, 1), dist1, dist2, pgrid_2d, &
3632 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3633 : name="(AO | AO)")
3634 50 : DEALLOCATE (dist1, dist2)
3635 50 : nspins = SIZE(rho_ao_t, 1)
3636 50 : nimg = SIZE(rho_ao_t, 2)
3637 :
3638 1472 : DO i_img = 1, nimg
3639 3074 : DO i_spin = 1, nspins
3640 1602 : IF (.NOT. (i_img == 1 .AND. i_spin == 1)) &
3641 1552 : CALL dbt_create(rho_ao_t_sub(1, 1), rho_ao_t_sub(i_spin, i_img))
3642 : CALL copy_2c_to_subgroup(rho_ao_t_sub(i_spin, i_img), rho_ao_t(i_spin, i_img), &
3643 1602 : group_size, ngroups, para_env)
3644 3024 : CALL dbt_destroy(rho_ao_t(i_spin, i_img))
3645 : END DO
3646 : END DO
3647 :
3648 : !The RIxRI matrices, going through tensors
3649 : CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3650 : ri_data%bsizes_RI, ri_data%bsizes_RI, &
3651 : name="(RI | RI)")
3652 50 : CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3653 :
3654 200 : ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3655 50 : iproc = 0
3656 100 : DO i = 0, pdims_2d(1) - 1
3657 150 : DO j = 0, pdims_2d(2) - 1
3658 50 : dbcsr_pgrid(i, j) = iproc
3659 100 : iproc = iproc + 1
3660 : END DO
3661 : END DO
3662 :
3663 : !We need to have the same exact 2d block dist as the tensors
3664 200 : ALLOCATE (col_dist(natom), row_dist(natom))
3665 150 : row_dist(:) = dist1(:)
3666 150 : col_dist(:) = dist2(:)
3667 :
3668 100 : ALLOCATE (RI_blk_size(natom))
3669 150 : RI_blk_size(:) = ri_data%bsizes_RI(:)
3670 :
3671 : CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3672 50 : row_dist=row_dist, col_dist=col_dist)
3673 : CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3674 50 : row_blk_size=RI_blk_size, col_blk_size=RI_blk_size)
3675 :
3676 : !The HFX potential
3677 1472 : DO i_img = 1, nimg
3678 1422 : IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3679 1422 : CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3680 1422 : CALL get_tensor_occupancy(work, nze, occ)
3681 1422 : IF (nze == 0) CYCLE
3682 :
3683 822 : CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3684 822 : CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3685 822 : CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3686 2294 : CALL dbt_clear(work_sub)
3687 : END DO
3688 :
3689 : !The derivatives of the HFX potential
3690 200 : DO i_xyz = 1, 3
3691 4466 : DO i_img = 1, nimg
3692 4266 : CALL dbcsr_create(mat_der_pot_sub(i_img, i_xyz), template=mat_2c_pot(1))
3693 4266 : CALL dbt_copy_matrix_to_tensor(mat_der_pot(i_img, i_xyz), work)
3694 4266 : CALL dbcsr_release(mat_der_pot(i_img, i_xyz))
3695 4266 : CALL get_tensor_occupancy(work, nze, occ)
3696 4266 : IF (nze == 0) CYCLE
3697 :
3698 2458 : CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3699 2458 : CALL dbt_copy_tensor_to_matrix(work_sub, mat_der_pot_sub(i_img, i_xyz))
3700 2458 : CALL dbcsr_filter(mat_der_pot_sub(i_img, i_xyz), ri_data%filter_eps)
3701 6874 : CALL dbt_clear(work_sub)
3702 : END DO
3703 : END DO
3704 :
3705 50 : CALL dbt_destroy(work)
3706 50 : CALL dbt_destroy(work_sub)
3707 50 : CALL dbt_pgrid_destroy(pgrid_2d)
3708 50 : CALL dbcsr_distribution_release(dbcsr_dist_sub)
3709 50 : DEALLOCATE (col_dist, row_dist, RI_blk_size, dbcsr_pgrid)
3710 :
3711 50 : CALL timestop(handle)
3712 :
3713 400 : END SUBROUTINE get_subgroup_2c_derivs
3714 :
3715 : ! **************************************************************************************************
3716 : !> \brief copy all required 3c derivative tensors from the main MPI group to the subgroups
3717 : !> \param t_3c_work_2 ...
3718 : !> \param t_3c_work_3 ...
3719 : !> \param t_3c_der_AO ...
3720 : !> \param t_3c_der_AO_sub ...
3721 : !> \param t_3c_der_RI ...
3722 : !> \param t_3c_der_RI_sub ...
3723 : !> \param t_3c_apc ...
3724 : !> \param t_3c_apc_sub ...
3725 : !> \param t_3c_der_stack ...
3726 : !> \param group_size ...
3727 : !> \param ngroups ...
3728 : !> \param para_env ...
3729 : !> \param para_env_sub ...
3730 : !> \param ri_data ...
3731 : !> \note the tensor containing the derivatives in the main MPI group are deleted for memory
3732 : ! **************************************************************************************************
3733 50 : SUBROUTINE get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
3734 50 : t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, &
3735 50 : t_3c_der_stack, group_size, ngroups, para_env, para_env_sub, &
3736 : ri_data)
3737 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_work_2, t_3c_work_3
3738 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_AO, t_3c_der_AO_sub, &
3739 : t_3c_der_RI, t_3c_der_RI_sub, &
3740 : t_3c_apc, t_3c_apc_sub
3741 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_der_stack
3742 : INTEGER, INTENT(IN) :: group_size, ngroups
3743 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3744 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3745 :
3746 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_3c_derivs'
3747 :
3748 : INTEGER :: batch_size, handle, i_img, i_RI, i_spin, &
3749 : i_xyz, ib, nblks_AO, nblks_RI, nimg, &
3750 : nspins, pdims(3)
3751 : INTEGER(int_8) :: nze
3752 50 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3753 50 : bsizes_stack, dist1, dist2, dist3, &
3754 50 : dist_stack, idx_to_at
3755 50 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: subgroup_dest
3756 : REAL(dp) :: occ
3757 450 : TYPE(dbt_distribution_type) :: t_dist
3758 150 : TYPE(dbt_pgrid_type) :: pgrid
3759 1250 : TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3760 :
3761 50 : CALL timeset(routineN, handle)
3762 :
3763 : !We use intermediate tensors with larger block size for more optimized communication
3764 50 : nblks_RI = SIZE(ri_data%bsizes_RI)
3765 150 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*nblks_RI))
3766 366 : DO i_RI = 1, ri_data%ncell_RI
3767 998 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI(:)
3768 : END DO
3769 :
3770 50 : CALL dbt_get_info(ri_data%kp_t_3c_int(1), pdims=pdims)
3771 50 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3772 :
3773 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3774 : pgrid, bsizes_RI_ext, ri_data%bsizes_AO, &
3775 50 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3776 50 : DEALLOCATE (dist1, dist2, dist3)
3777 :
3778 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3779 : ri_data%pgrid_2, bsizes_RI_ext, ri_data%bsizes_AO, &
3780 50 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3781 50 : DEALLOCATE (dist1, dist2, dist3)
3782 50 : CALL dbt_pgrid_destroy(pgrid)
3783 :
3784 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3785 50 : group_size, ngroups, para_env)
3786 :
3787 : !We use the 3c integrals on the subgroup as template for the derivatives
3788 50 : nimg = ri_data%nimg
3789 200 : DO i_xyz = 1, 3
3790 4416 : DO i_img = 1, nimg
3791 4266 : CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_AO_sub(i_img, i_xyz))
3792 4266 : CALL get_tensor_occupancy(t_3c_der_AO(i_img, i_xyz), nze, occ)
3793 4266 : IF (nze == 0) CYCLE
3794 :
3795 2510 : CALL dbt_copy(t_3c_der_AO(i_img, i_xyz), work_atom_block, move_data=.TRUE.)
3796 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3797 2510 : ngroups, para_env, subgroup_dest)
3798 2510 : CALL dbt_copy(work_atom_block_sub, t_3c_der_AO_sub(i_img, i_xyz), move_data=.TRUE.)
3799 6926 : CALL dbt_filter(t_3c_der_AO_sub(i_img, i_xyz), ri_data%filter_eps)
3800 : END DO
3801 :
3802 4416 : DO i_img = 1, nimg
3803 4266 : CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_RI_sub(i_img, i_xyz))
3804 4266 : CALL get_tensor_occupancy(t_3c_der_RI(i_img, i_xyz), nze, occ)
3805 4266 : IF (nze == 0) CYCLE
3806 :
3807 2468 : CALL dbt_copy(t_3c_der_RI(i_img, i_xyz), work_atom_block, move_data=.TRUE.)
3808 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3809 2468 : ngroups, para_env, subgroup_dest)
3810 2468 : CALL dbt_copy(work_atom_block_sub, t_3c_der_RI_sub(i_img, i_xyz), move_data=.TRUE.)
3811 6884 : CALL dbt_filter(t_3c_der_RI_sub(i_img, i_xyz), ri_data%filter_eps)
3812 : END DO
3813 :
3814 4466 : DO i_img = 1, nimg
3815 4266 : CALL dbt_destroy(t_3c_der_RI(i_img, i_xyz))
3816 4416 : CALL dbt_destroy(t_3c_der_AO(i_img, i_xyz))
3817 : END DO
3818 : END DO
3819 50 : CALL dbt_destroy(work_atom_block_sub)
3820 50 : CALL dbt_destroy(work_atom_block)
3821 50 : DEALLOCATE (subgroup_dest)
3822 :
3823 : !Deal with t_3c_apc
3824 50 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
3825 150 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks_RI))
3826 366 : DO i_RI = 1, ri_data%ncell_RI
3827 1802 : bsizes_RI_ext_split((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
3828 : END DO
3829 :
3830 50 : pdims = 0
3831 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3832 200 : tensor_dims=[1, SIZE(bsizes_RI_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3833 :
3834 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3835 : pgrid, ri_data%bsizes_AO, bsizes_RI_ext, &
3836 50 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3837 50 : DEALLOCATE (dist1, dist2, dist3)
3838 :
3839 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3840 : ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_RI_ext, &
3841 50 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3842 50 : DEALLOCATE (dist1, dist2, dist3)
3843 :
3844 : CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3845 : pgrid, ri_data%bsizes_AO_split, bsizes_RI_ext_split, &
3846 50 : ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3847 50 : DEALLOCATE (dist1, dist2, dist3)
3848 :
3849 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3850 50 : group_size, ngroups, para_env)
3851 :
3852 150 : ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3853 50 : CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3854 50 : nspins = SIZE(t_3c_apc, 1)
3855 1472 : DO i_img = 1, nimg
3856 3024 : DO i_spin = 1, nspins
3857 1602 : CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3858 1602 : CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3859 1602 : IF (nze == 0) CYCLE
3860 1554 : CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.TRUE.)
3861 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, ngroups, para_env, &
3862 1554 : subgroup_dest, ri_data%iatom_to_subgroup, 1, idx_to_at)
3863 1554 : CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.TRUE.)
3864 4578 : CALL dbt_filter(t_3c_apc_sub(i_spin, i_img), ri_data%filter_eps)
3865 : END DO
3866 3074 : DO i_spin = 1, nspins
3867 3024 : CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3868 : END DO
3869 : END DO
3870 50 : CALL dbt_destroy(tmp)
3871 50 : CALL dbt_destroy(work_atom_block)
3872 50 : CALL dbt_destroy(work_atom_block_sub)
3873 50 : CALL dbt_pgrid_destroy(pgrid)
3874 :
3875 : !t_3c_work_3 based on structure of 3c integrals/derivs
3876 50 : batch_size = ri_data%kp_stack_size
3877 50 : nblks_AO = SIZE(ri_data%bsizes_AO_split)
3878 150 : ALLOCATE (bsizes_stack(batch_size*nblks_AO))
3879 882 : DO ib = 1, batch_size
3880 4050 : bsizes_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = ri_data%bsizes_AO_split(:)
3881 : END DO
3882 :
3883 350 : ALLOCATE (dist1(ri_data%ncell_RI*nblks_RI), dist2(nblks_AO), dist3(nblks_AO))
3884 : CALL dbt_get_info(ri_data%kp_t_3c_int(1), proc_dist_1=dist1, proc_dist_2=dist2, &
3885 50 : proc_dist_3=dist3, pdims=pdims)
3886 :
3887 150 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3888 882 : DO ib = 1, batch_size
3889 4050 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3890 : END DO
3891 :
3892 50 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3893 50 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3894 : CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3895 50 : bsizes_RI_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3896 50 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3897 50 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3898 50 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(4))
3899 50 : CALL dbt_distribution_destroy(t_dist)
3900 50 : CALL dbt_pgrid_destroy(pgrid)
3901 50 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3902 :
3903 : !the derivatives are stacked in the same way
3904 50 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(1))
3905 50 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(2))
3906 50 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(3))
3907 50 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(4))
3908 50 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(5))
3909 50 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(6))
3910 :
3911 : !t_3c_work_2 based on structure of t_3c_apc
3912 350 : ALLOCATE (dist1(nblks_AO), dist2(ri_data%ncell_RI*nblks_RI), dist3(nblks_AO))
3913 : CALL dbt_get_info(t_3c_apc_sub(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, &
3914 50 : proc_dist_3=dist3, pdims=pdims)
3915 :
3916 150 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3917 882 : DO ib = 1, batch_size
3918 4050 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3919 : END DO
3920 :
3921 50 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3922 50 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3923 : CALL dbt_create(t_3c_work_2(1), "work_3_stack", t_dist, [1], [2, 3], &
3924 50 : ri_data%bsizes_AO_split, bsizes_RI_ext_split, bsizes_stack)
3925 50 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3926 50 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3927 50 : CALL dbt_distribution_destroy(t_dist)
3928 50 : CALL dbt_pgrid_destroy(pgrid)
3929 50 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3930 :
3931 50 : CALL timestop(handle)
3932 :
3933 100 : END SUBROUTINE get_subgroup_3c_derivs
3934 :
3935 : ! **************************************************************************************************
3936 : !> \brief A routine that reorders the t_3c_int tensors such that all items which are fully empty
3937 : !> are bunched together. This way, we can get much more efficient screening based on NZE
3938 : !> \param t_3c_ints ...
3939 : !> \param ri_data ...
3940 : ! **************************************************************************************************
3941 88 : SUBROUTINE reorder_3c_ints(t_3c_ints, ri_data)
3942 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_ints
3943 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3944 :
3945 : CHARACTER(LEN=*), PARAMETER :: routineN = 'reorder_3c_ints'
3946 :
3947 : INTEGER :: handle, i_img, idx, idx_empty, idx_full, &
3948 : nimg
3949 : INTEGER(int_8) :: nze
3950 : REAL(dp) :: occ
3951 88 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3952 :
3953 88 : CALL timeset(routineN, handle)
3954 :
3955 88 : nimg = ri_data%nimg
3956 3322 : ALLOCATE (t_3c_tmp(nimg))
3957 2442 : DO i_img = 1, nimg
3958 2354 : CALL dbt_create(t_3c_ints(i_img), t_3c_tmp(i_img))
3959 2442 : CALL dbt_copy(t_3c_ints(i_img), t_3c_tmp(i_img), move_data=.TRUE.)
3960 : END DO
3961 :
3962 : !Loop over the images, check if ints have NZE == 0, and put them at the start or end of the
3963 : !initial tensor array. Keep the mapping in an array
3964 264 : ALLOCATE (ri_data%idx_to_img(nimg))
3965 88 : idx_full = 0
3966 88 : idx_empty = nimg + 1
3967 :
3968 2442 : DO i_img = 1, nimg
3969 2354 : CALL get_tensor_occupancy(t_3c_tmp(i_img), nze, occ)
3970 2354 : IF (nze == 0) THEN
3971 734 : idx_empty = idx_empty - 1
3972 734 : CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_empty), move_data=.TRUE.)
3973 734 : ri_data%idx_to_img(idx_empty) = i_img
3974 : ELSE
3975 1620 : idx_full = idx_full + 1
3976 1620 : CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_full), move_data=.TRUE.)
3977 1620 : ri_data%idx_to_img(idx_full) = i_img
3978 : END IF
3979 4796 : CALL dbt_destroy(t_3c_tmp(i_img))
3980 : END DO
3981 :
3982 : !store the highest image index with non-zero integrals
3983 88 : ri_data%nimg_nze = idx_full
3984 :
3985 176 : ALLOCATE (ri_data%img_to_idx(nimg))
3986 2442 : DO idx = 1, nimg
3987 2442 : ri_data%img_to_idx(ri_data%idx_to_img(idx)) = idx
3988 : END DO
3989 :
3990 88 : CALL timestop(handle)
3991 :
3992 2530 : END SUBROUTINE reorder_3c_ints
3993 :
3994 : ! **************************************************************************************************
3995 : !> \brief A routine that reorders the 3c derivatives, the same way that the integrals are, also to
3996 : !> increase efficiency of screening
3997 : !> \param t_3c_derivs ...
3998 : !> \param ri_data ...
3999 : ! **************************************************************************************************
4000 100 : SUBROUTINE reorder_3c_derivs(t_3c_derivs, ri_data)
4001 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_derivs
4002 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4003 :
4004 : CHARACTER(LEN=*), PARAMETER :: routineN = 'reorder_3c_derivs'
4005 :
4006 : INTEGER :: handle, i_img, i_xyz, idx, nimg
4007 : INTEGER(int_8) :: nze
4008 : REAL(dp) :: occ
4009 100 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
4010 :
4011 100 : CALL timeset(routineN, handle)
4012 :
4013 100 : nimg = ri_data%nimg
4014 3944 : ALLOCATE (t_3c_tmp(nimg))
4015 2944 : DO i_img = 1, nimg
4016 2944 : CALL dbt_create(t_3c_derivs(1, 1), t_3c_tmp(i_img))
4017 : END DO
4018 :
4019 400 : DO i_xyz = 1, 3
4020 8832 : DO i_img = 1, nimg
4021 8832 : CALL dbt_copy(t_3c_derivs(i_img, i_xyz), t_3c_tmp(i_img), move_data=.TRUE.)
4022 : END DO
4023 8932 : DO i_img = 1, nimg
4024 8532 : idx = ri_data%img_to_idx(i_img)
4025 8532 : CALL dbt_copy(t_3c_tmp(i_img), t_3c_derivs(idx, i_xyz), move_data=.TRUE.)
4026 8532 : CALL get_tensor_occupancy(t_3c_derivs(idx, i_xyz), nze, occ)
4027 8832 : IF (nze > 0) ri_data%nimg_nze = MAX(idx, ri_data%nimg_nze)
4028 : END DO
4029 : END DO
4030 :
4031 2944 : DO i_img = 1, nimg
4032 2944 : CALL dbt_destroy(t_3c_tmp(i_img))
4033 : END DO
4034 :
4035 100 : CALL timestop(handle)
4036 :
4037 3044 : END SUBROUTINE reorder_3c_derivs
4038 :
4039 : ! **************************************************************************************************
4040 : !> \brief Get the sparsity pattern related to the non-symmetric AO basis overlap neighbor list
4041 : !> \param pattern ...
4042 : !> \param ri_data ...
4043 : !> \param qs_env ...
4044 : ! **************************************************************************************************
4045 324 : SUBROUTINE get_sparsity_pattern(pattern, ri_data, qs_env)
4046 : INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: pattern
4047 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4048 : TYPE(qs_environment_type), POINTER :: qs_env
4049 :
4050 : INTEGER :: iatom, j_img, jatom, mj_img, natom, nimg
4051 324 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bins
4052 324 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: tmp_pattern
4053 : INTEGER, DIMENSION(3) :: cell_j
4054 324 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
4055 324 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4056 : TYPE(dft_control_type), POINTER :: dft_control
4057 : TYPE(kpoint_type), POINTER :: kpoints
4058 : TYPE(mp_para_env_type), POINTER :: para_env
4059 : TYPE(neighbor_list_iterator_p_type), &
4060 324 : DIMENSION(:), POINTER :: nl_iterator
4061 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4062 324 : POINTER :: nl_2c
4063 :
4064 324 : NULLIFY (nl_2c, nl_iterator, kpoints, cell_to_index, dft_control, index_to_cell, para_env)
4065 :
4066 324 : CALL get_qs_env(qs_env, kpoints=kpoints, dft_control=dft_control, para_env=para_env, natom=natom)
4067 324 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell, sab_nl=nl_2c)
4068 :
4069 324 : nimg = ri_data%nimg
4070 65494 : pattern(:, :, :) = 0
4071 :
4072 : !We use the symmetric nl for all images that have an opposite cell
4073 324 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4074 15727 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4075 15403 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
4076 :
4077 15403 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4078 15403 : IF (j_img > nimg .OR. j_img < 1) CYCLE
4079 :
4080 11236 : mj_img = get_opp_index(j_img, qs_env)
4081 11236 : IF (mj_img > nimg .OR. mj_img < 1) CYCLE
4082 :
4083 10725 : IF (ri_data%present_images(j_img) == 0) CYCLE
4084 :
4085 15403 : pattern(iatom, jatom, j_img) = 1
4086 : END DO
4087 324 : CALL neighbor_list_iterator_release(nl_iterator)
4088 :
4089 : !If there is no opposite cell present, then we take into account the non-symmetric nl
4090 324 : CALL get_kpoint_info(kpoints, sab_nl_nosym=nl_2c)
4091 :
4092 324 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4093 20738 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4094 20414 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
4095 :
4096 20414 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4097 20414 : IF (j_img > nimg .OR. j_img < 1) CYCLE
4098 :
4099 14511 : mj_img = get_opp_index(j_img, qs_env)
4100 14511 : IF (mj_img <= nimg .AND. mj_img > 0) CYCLE
4101 :
4102 529 : IF (ri_data%present_images(j_img) == 0) CYCLE
4103 :
4104 20414 : pattern(iatom, jatom, j_img) = 1
4105 : END DO
4106 324 : CALL neighbor_list_iterator_release(nl_iterator)
4107 :
4108 130664 : CALL para_env%sum(pattern)
4109 :
4110 : !If the opposite image is considered, then there is no need to compute diagonal twice
4111 9310 : DO j_img = 2, nimg
4112 27282 : DO iatom = 1, natom
4113 26958 : IF (pattern(iatom, iatom, j_img) /= 0) THEN
4114 5748 : mj_img = get_opp_index(j_img, qs_env)
4115 5748 : IF (mj_img > nimg .OR. mj_img < 1) CYCLE
4116 5748 : pattern(iatom, iatom, mj_img) = 0
4117 : END IF
4118 : END DO
4119 : END DO
4120 :
4121 : ! We want to equilibrate the sparsity pattern such that there are same amount of blocks
4122 : ! for each atom i of i,j pairs
4123 972 : ALLOCATE (bins(natom))
4124 972 : bins(:) = 0
4125 :
4126 1620 : ALLOCATE (tmp_pattern(natom, natom, nimg))
4127 65494 : tmp_pattern(:, :, :) = 0
4128 9634 : DO j_img = 1, nimg
4129 28254 : DO jatom = 1, natom
4130 65170 : DO iatom = 1, natom
4131 37240 : IF (pattern(iatom, jatom, j_img) == 0) CYCLE
4132 11808 : mj_img = get_opp_index(j_img, qs_env)
4133 :
4134 : !Should we take the i,j,b or th j,i,-b atomic block?
4135 30428 : IF (mj_img > nimg .OR. mj_img < 1) THEN
4136 : !No opposite image, no choice
4137 222 : bins(iatom) = bins(iatom) + 1
4138 222 : tmp_pattern(iatom, jatom, j_img) = 1
4139 : ELSE
4140 :
4141 11586 : IF (bins(iatom) > bins(jatom)) THEN
4142 2460 : bins(jatom) = bins(jatom) + 1
4143 2460 : tmp_pattern(jatom, iatom, mj_img) = 1
4144 : ELSE
4145 9126 : bins(iatom) = bins(iatom) + 1
4146 9126 : tmp_pattern(iatom, jatom, j_img) = 1
4147 : END IF
4148 : END IF
4149 : END DO
4150 : END DO
4151 : END DO
4152 :
4153 : ! -1 => unoccupied, 0 => occupied
4154 65494 : pattern(:, :, :) = tmp_pattern(:, :, :) - 1
4155 :
4156 648 : END SUBROUTINE get_sparsity_pattern
4157 :
4158 : ! **************************************************************************************************
4159 : !> \brief Distribute the iatom, jatom, b_img triplet over the subgroupd to spread the load
4160 : !> the group id for each triplet is passed as the value of sparsity_pattern(i, j, b),
4161 : !> with -1 being an unoccupied block
4162 : !> \param sparsity_pattern ...
4163 : !> \param ngroups ...
4164 : !> \param ri_data ...
4165 : ! **************************************************************************************************
4166 324 : SUBROUTINE get_sub_dist(sparsity_pattern, ngroups, ri_data)
4167 : INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: sparsity_pattern
4168 : INTEGER, INTENT(IN) :: ngroups
4169 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4170 :
4171 : INTEGER :: b_img, ctr, iat, iatom, igroup, jatom, &
4172 : natom, nimg, ub
4173 324 : INTEGER, ALLOCATABLE, DIMENSION(:) :: max_at_per_group
4174 : REAL(dp) :: cost
4175 324 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4176 :
4177 324 : natom = SIZE(sparsity_pattern, 2)
4178 324 : nimg = SIZE(sparsity_pattern, 3)
4179 :
4180 : !To avoid unnecessary data replication accross the subgroups, we want to have a limited number
4181 : !of subgroup with the data of a given iatom. At the minimum, all groups have 1 atom
4182 : !We assume that the cost associated to each iatom is roughly the same
4183 324 : IF (.NOT. ALLOCATED(ri_data%iatom_to_subgroup)) THEN
4184 420 : ALLOCATE (ri_data%iatom_to_subgroup(natom), max_at_per_group(ngroups))
4185 180 : DO iatom = 1, natom
4186 120 : NULLIFY (ri_data%iatom_to_subgroup(iatom)%array)
4187 240 : ALLOCATE (ri_data%iatom_to_subgroup(iatom)%array(ngroups))
4188 420 : ri_data%iatom_to_subgroup(iatom)%array(:) = .FALSE.
4189 : END DO
4190 :
4191 60 : ub = natom/ngroups
4192 60 : IF (ub*ngroups < natom) ub = ub + 1
4193 180 : max_at_per_group(:) = MAX(1, ub)
4194 :
4195 : !We want each atom to be present the same amount of times. Some groups might have more atoms
4196 : !than other to achieve this.
4197 : ctr = 0
4198 180 : DO WHILE (MODULO(SUM(max_at_per_group), natom) /= 0)
4199 0 : igroup = MODULO(ctr, ngroups) + 1
4200 0 : max_at_per_group(igroup) = max_at_per_group(igroup) + 1
4201 60 : ctr = ctr + 1
4202 : END DO
4203 :
4204 : ctr = 0
4205 180 : DO igroup = 1, ngroups
4206 300 : DO iat = 1, max_at_per_group(igroup)
4207 120 : iatom = MODULO(ctr, natom) + 1
4208 120 : ri_data%iatom_to_subgroup(iatom)%array(igroup) = .TRUE.
4209 240 : ctr = ctr + 1
4210 : END DO
4211 : END DO
4212 : END IF
4213 :
4214 972 : ALLOCATE (bins(ngroups))
4215 972 : bins = 0.0_dp
4216 9634 : DO b_img = 1, nimg
4217 28254 : DO jatom = 1, natom
4218 65170 : DO iatom = 1, natom
4219 37240 : IF (sparsity_pattern(iatom, jatom, b_img) == -1) CYCLE
4220 59040 : igroup = MINLOC(bins, 1, MASK=ri_data%iatom_to_subgroup(iatom)%array) - 1
4221 :
4222 : !Use cost information from previous SCF if available
4223 765820 : IF (ANY(ri_data%kp_cost > EPSILON(0.0_dp))) THEN
4224 8936 : cost = ri_data%kp_cost(iatom, jatom, b_img)
4225 : ELSE
4226 2872 : cost = REAL(ri_data%bsizes_AO(iatom)*ri_data%bsizes_AO(jatom), dp)
4227 : END IF
4228 11808 : bins(igroup + 1) = bins(igroup + 1) + cost
4229 55860 : sparsity_pattern(iatom, jatom, b_img) = igroup
4230 : END DO
4231 : END DO
4232 : END DO
4233 :
4234 324 : END SUBROUTINE get_sub_dist
4235 :
4236 : ! **************************************************************************************************
4237 : !> \brief A rouine that updates the sparsity pattern for force calculation, where all i,j,b combinations
4238 : !> are visited.
4239 : !> \param force_pattern ...
4240 : !> \param scf_pattern ...
4241 : !> \param ngroups ...
4242 : !> \param ri_data ...
4243 : !> \param qs_env ...
4244 : ! **************************************************************************************************
4245 50 : SUBROUTINE update_pattern_to_forces(force_pattern, scf_pattern, ngroups, ri_data, qs_env)
4246 : INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: force_pattern, scf_pattern
4247 : INTEGER, INTENT(IN) :: ngroups
4248 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4249 : TYPE(qs_environment_type), POINTER :: qs_env
4250 :
4251 : INTEGER :: b_img, iatom, igroup, jatom, mb_img, &
4252 : natom, nimg
4253 50 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4254 :
4255 50 : natom = SIZE(scf_pattern, 2)
4256 50 : nimg = SIZE(scf_pattern, 3)
4257 :
4258 150 : ALLOCATE (bins(ngroups))
4259 150 : bins = 0.0_dp
4260 :
4261 1472 : DO b_img = 1, nimg
4262 1422 : mb_img = get_opp_index(b_img, qs_env)
4263 4316 : DO jatom = 1, natom
4264 9954 : DO iatom = 1, natom
4265 : !Important: same distribution as KS matrix, because reuse t_3c_apc
4266 28440 : igroup = MINLOC(bins, 1, MASK=ri_data%iatom_to_subgroup(iatom)%array) - 1
4267 :
4268 : !check that block not already treated
4269 5688 : IF (scf_pattern(iatom, jatom, b_img) > -1) CYCLE
4270 :
4271 : !If not, take the cost of block j, i, -b (same energy contribution)
4272 7050 : IF (mb_img > 0 .AND. mb_img <= nimg) THEN
4273 3714 : IF (scf_pattern(jatom, iatom, mb_img) == -1) CYCLE
4274 1346 : bins(igroup + 1) = bins(igroup + 1) + ri_data%kp_cost(jatom, iatom, mb_img)
4275 1346 : force_pattern(iatom, jatom, b_img) = igroup
4276 : END IF
4277 : END DO
4278 : END DO
4279 : END DO
4280 :
4281 50 : END SUBROUTINE update_pattern_to_forces
4282 :
4283 : ! **************************************************************************************************
4284 : !> \brief A routine that determines the extend of the KP RI-HFX periodic images, including for the
4285 : !> extension of the RI basis
4286 : !> \param ri_data ...
4287 : !> \param qs_env ...
4288 : ! **************************************************************************************************
4289 88 : SUBROUTINE get_kp_and_ri_images(ri_data, qs_env)
4290 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4291 : TYPE(qs_environment_type), POINTER :: qs_env
4292 :
4293 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_kp_and_ri_images'
4294 :
4295 : CHARACTER(LEN=512) :: warning_msg
4296 : INTEGER :: cell_j(3), cell_k(3), handle, i_img, iatom, ikind, j_img, jatom, jcell, katom, &
4297 : kcell, kp_index_lbounds(3), kp_index_ubounds(3), natom, ngroups, nimg, nkind, pcoord(3), &
4298 : pdims(3)
4299 88 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, &
4300 88 : nRI_per_atom, present_img, RI_cells
4301 88 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4302 : REAL(dp) :: bump_fact, dij, dik, image_range, &
4303 : RI_range, rij(3), rik(3)
4304 616 : TYPE(dbt_type) :: t_dummy
4305 : TYPE(dft_control_type), POINTER :: dft_control
4306 : TYPE(distribution_2d_type), POINTER :: dist_2d
4307 : TYPE(distribution_3d_type) :: dist_3d
4308 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4309 88 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
4310 : TYPE(kpoint_type), POINTER :: kpoints
4311 88 : TYPE(mp_cart_type) :: mp_comm_t3c
4312 : TYPE(mp_para_env_type), POINTER :: para_env
4313 : TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
4314 : TYPE(neighbor_list_3c_type) :: nl_3c
4315 : TYPE(neighbor_list_iterator_p_type), &
4316 88 : DIMENSION(:), POINTER :: nl_iterator
4317 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4318 88 : POINTER :: nl_2c
4319 88 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4320 88 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4321 : TYPE(section_vals_type), POINTER :: hfx_section
4322 :
4323 88 : NULLIFY (qs_kind_set, dist_2d, nl_2c, nl_iterator, dft_control, &
4324 88 : particle_set, kpoints, para_env, cell_to_index, hfx_section)
4325 :
4326 88 : CALL timeset(routineN, handle)
4327 :
4328 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, &
4329 : dft_control=dft_control, particle_set=particle_set, kpoints=kpoints, &
4330 88 : para_env=para_env, natom=natom)
4331 88 : nimg = dft_control%nimages
4332 88 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
4333 352 : kp_index_lbounds = LBOUND(cell_to_index)
4334 352 : kp_index_ubounds = UBOUND(cell_to_index)
4335 :
4336 88 : hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
4337 88 : CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
4338 :
4339 620 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
4340 88 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
4341 88 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
4342 :
4343 : !In case of shortrange HFX potential, it is imprtant to be consistent with the rest of the KP
4344 : !code, and use EPS_SCHWARZ to determine the range (rather than eps_filter_2c in normal RI-HFX)
4345 88 : IF (ri_data%hfx_pot%potential_type == do_potential_short) THEN
4346 0 : CALL erfc_cutoff(ri_data%eps_schwarz, ri_data%hfx_pot%omega, ri_data%hfx_pot%cutoff_radius)
4347 : WRITE (warning_msg, '(A)') &
4348 : "The SHORTANGE HFX potential typically extends over many periodic images, "// &
4349 : "possibly slowing down the calculation. Consider using the TRUNCATED "// &
4350 0 : "potential for better computational performance."
4351 0 : CPWARN(warning_msg)
4352 : END IF
4353 :
4354 : !Determine the range for contributing periodic images, and for the RI basis extension
4355 88 : ri_data%kp_RI_range = 0.0_dp
4356 88 : ri_data%kp_image_range = 0.0_dp
4357 222 : DO ikind = 1, nkind
4358 :
4359 134 : CALL init_interaction_radii_orb_basis(basis_set_AO(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4360 134 : CALL get_gto_basis_set(basis_set_AO(ikind)%gto_basis_set, kind_radius=RI_range)
4361 134 : ri_data%kp_RI_range = MAX(RI_range, ri_data%kp_RI_range)
4362 :
4363 134 : CALL init_interaction_radii_orb_basis(basis_set_AO(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4364 134 : CALL init_interaction_radii_orb_basis(basis_set_RI(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4365 134 : CALL get_gto_basis_set(basis_set_RI(ikind)%gto_basis_set, kind_radius=image_range)
4366 :
4367 134 : image_range = 2.0_dp*image_range + cutoff_screen_factor*ri_data%hfx_pot%cutoff_radius
4368 356 : ri_data%kp_image_range = MAX(image_range, ri_data%kp_image_range)
4369 : END DO
4370 :
4371 88 : CALL section_vals_val_get(hfx_section, "KP_RI_BUMP_FACTOR", r_val=bump_fact)
4372 88 : ri_data%kp_bump_rad = bump_fact*ri_data%kp_RI_range
4373 :
4374 : !For the extent of the KP RI-HFX images, we are limited by the RI-HFX potential in
4375 : !(mu^0 sigma^a|P^0) (P^0|Q^b) (Q^b|nu^b lambda^a+c), if there is no contact between
4376 : !any P^0 and Q^b, then image b does not contribute
4377 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
4378 88 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
4379 :
4380 264 : ALLOCATE (present_img(nimg))
4381 4428 : present_img = 0
4382 88 : ri_data%nimg = 0
4383 88 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4384 2078 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4385 1990 : CALL get_iterator_info(nl_iterator, r=rij, cell=cell_j)
4386 :
4387 7960 : dij = NORM2(rij)
4388 :
4389 1990 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4390 1990 : IF (j_img > nimg .OR. j_img < 1) CYCLE
4391 :
4392 1952 : IF (dij > ri_data%kp_image_range) CYCLE
4393 :
4394 1952 : ri_data%nimg = MAX(j_img, ri_data%nimg)
4395 1990 : present_img(j_img) = 1
4396 :
4397 : END DO
4398 88 : CALL neighbor_list_iterator_release(nl_iterator)
4399 88 : CALL release_neighbor_list_sets(nl_2c)
4400 88 : CALL para_env%max(ri_data%nimg)
4401 88 : IF (ri_data%nimg > nimg) &
4402 0 : CPABORT("Make sure the smallest exponent of the RI-HFX basis is larger than that of the ORB basis.")
4403 :
4404 : !Keep track of which images will not contribute, so that can be ignored before calculation
4405 88 : CALL para_env%sum(present_img)
4406 264 : ALLOCATE (ri_data%present_images(ri_data%nimg))
4407 2442 : ri_data%present_images = 0
4408 2442 : DO i_img = 1, ri_data%nimg
4409 2442 : IF (present_img(i_img) > 0) ri_data%present_images(i_img) = 1
4410 : END DO
4411 :
4412 : CALL create_3c_tensor(t_dummy, dist_AO_1, dist_AO_2, dist_RI, &
4413 : ri_data%pgrid, ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4414 88 : map1=[1, 2], map2=[3], name="(AO AO | RI)")
4415 :
4416 88 : CALL dbt_mp_environ_pgrid(ri_data%pgrid, pdims, pcoord)
4417 88 : CALL mp_comm_t3c%create(ri_data%pgrid%mp_comm_2d, 3, pdims)
4418 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
4419 88 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
4420 88 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
4421 88 : CALL dbt_destroy(t_dummy)
4422 :
4423 : !For the extension of the RI basis P in (mu^0 sigma^a |P^i), we consider an atom if the distance,
4424 : !between mu^0 and P^i if smaller or equal to the kind radius of mu^0
4425 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, &
4426 : ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., &
4427 88 : own_dist=.TRUE.)
4428 :
4429 176 : ALLOCATE (RI_cells(nimg))
4430 4428 : RI_cells = 0
4431 :
4432 264 : ALLOCATE (nRI_per_atom(natom))
4433 264 : nRI_per_atom = 0
4434 :
4435 88 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
4436 85714 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
4437 : CALL get_3c_iterator_info(nl_3c_iter, cell_k=cell_k, rik=rik, cell_j=cell_j, &
4438 85626 : iatom=iatom, jatom=jatom, katom=katom)
4439 342504 : dik = NORM2(rik)
4440 :
4441 599382 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
4442 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
4443 :
4444 85626 : jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4445 85626 : IF (jcell > nimg .OR. jcell < 1) CYCLE
4446 :
4447 571310 : IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
4448 : ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
4449 :
4450 77083 : kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
4451 77083 : IF (kcell > nimg .OR. kcell < 1) CYCLE
4452 :
4453 62877 : IF (dik > ri_data%kp_RI_range) CYCLE
4454 7846 : RI_cells(kcell) = 1
4455 :
4456 7934 : IF (jcell == 1 .AND. iatom == jatom) nRI_per_atom(iatom) = nRI_per_atom(iatom) + ri_data%bsizes_RI(katom)
4457 : END DO
4458 88 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
4459 88 : CALL neighbor_list_3c_destroy(nl_3c)
4460 88 : CALL para_env%sum(RI_cells)
4461 88 : CALL para_env%sum(nRI_per_atom)
4462 :
4463 176 : ALLOCATE (ri_data%img_to_RI_cell(nimg))
4464 88 : ri_data%ncell_RI = 0
4465 4428 : ri_data%img_to_RI_cell = 0
4466 4428 : DO i_img = 1, nimg
4467 4428 : IF (RI_cells(i_img) > 0) THEN
4468 554 : ri_data%ncell_RI = ri_data%ncell_RI + 1
4469 554 : ri_data%img_to_RI_cell(i_img) = ri_data%ncell_RI
4470 : END IF
4471 : END DO
4472 :
4473 264 : ALLOCATE (ri_data%RI_cell_to_img(ri_data%ncell_RI))
4474 4428 : DO i_img = 1, nimg
4475 4428 : IF (ri_data%img_to_RI_cell(i_img) > 0) ri_data%RI_cell_to_img(ri_data%img_to_RI_cell(i_img)) = i_img
4476 : END DO
4477 :
4478 : !Print some info
4479 88 : IF (ri_data%unit_nr > 0) THEN
4480 : WRITE (ri_data%unit_nr, FMT="(/T3,A,I29)") &
4481 44 : "KP-HFX_RI_INFO| Number of RI-KP parallel groups:", ngroups
4482 : WRITE (ri_data%unit_nr, FMT="(T3,A,I29)") &
4483 44 : "KP-HFX_RI_INFO| Tensor stack size: ", ri_data%kp_stack_size
4484 : WRITE (ri_data%unit_nr, FMT="(T3,A,F31.3,A)") &
4485 44 : "KP-HFX_RI_INFO| RI basis extension radius:", ri_data%kp_RI_range*angstrom, " Ang"
4486 : WRITE (ri_data%unit_nr, FMT="(T3,A,F12.3,A, F6.3, A)") &
4487 44 : "KP-HFX_RI_INFO| RI basis bump factor and bump radius:", bump_fact, " /", &
4488 88 : ri_data%kp_bump_rad*angstrom, " Ang"
4489 : WRITE (ri_data%unit_nr, FMT="(T3,A,I16,A)") &
4490 44 : "KP-HFX_RI_INFO| The extended RI bases cover up to ", ri_data%ncell_RI, " unit cells"
4491 : WRITE (ri_data%unit_nr, FMT="(T3,A,I18)") &
4492 132 : "KP-HFX_RI_INFO| Average number of sgf in extended RI bases:", SUM(nRI_per_atom)/natom
4493 : WRITE (ri_data%unit_nr, FMT="(T3,A,F13.3,A)") &
4494 44 : "KP-HFX_RI_INFO| Consider all image cells within a radius of ", ri_data%kp_image_range*angstrom, " Ang"
4495 : WRITE (ri_data%unit_nr, FMT="(T3,A,I27/)") &
4496 44 : "KP-HFX_RI_INFO| Number of image cells considered: ", ri_data%nimg
4497 44 : CALL m_flush(ri_data%unit_nr)
4498 : END IF
4499 :
4500 88 : CALL timestop(handle)
4501 :
4502 1056 : END SUBROUTINE get_kp_and_ri_images
4503 :
4504 : ! **************************************************************************************************
4505 : !> \brief A routine that creates tensors structure for rho_ao and 3c_ints in a stacked format for
4506 : !> the efficient contractions of rho_sigma^0,lambda^c * (mu^0 sigam^a | P) => TAS tensors
4507 : !> \param res_stack ...
4508 : !> \param rho_stack ...
4509 : !> \param ints_stack ...
4510 : !> \param rho_template ...
4511 : !> \param ints_template ...
4512 : !> \param stack_size ...
4513 : !> \param ri_data ...
4514 : !> \param qs_env ...
4515 : !> \note The result tensor has the exact same shape and distribution as the integral tensor
4516 : ! **************************************************************************************************
4517 324 : SUBROUTINE get_stack_tensors(res_stack, rho_stack, ints_stack, rho_template, ints_template, &
4518 : stack_size, ri_data, qs_env)
4519 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: res_stack, rho_stack, ints_stack
4520 : TYPE(dbt_type), INTENT(INOUT) :: rho_template, ints_template
4521 : INTEGER, INTENT(IN) :: stack_size
4522 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4523 : TYPE(qs_environment_type), POINTER :: qs_env
4524 :
4525 : INTEGER :: is, nblks, nblks_3c(3), pdims_3d(3)
4526 324 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_stack, dist1, &
4527 324 : dist2, dist3, dist_stack1, &
4528 324 : dist_stack2, dist_stack3
4529 2916 : TYPE(dbt_distribution_type) :: t_dist
4530 972 : TYPE(dbt_pgrid_type) :: pgrid
4531 : TYPE(mp_para_env_type), POINTER :: para_env
4532 :
4533 324 : NULLIFY (para_env)
4534 :
4535 324 : CALL get_qs_env(qs_env, para_env=para_env)
4536 :
4537 324 : nblks = SIZE(ri_data%bsizes_AO_split)
4538 972 : ALLOCATE (bsizes_stack(stack_size*nblks))
4539 6468 : DO is = 1, stack_size
4540 26948 : bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
4541 : END DO
4542 :
4543 2916 : ALLOCATE (dist1(nblks), dist2(nblks), dist_stack1(stack_size*nblks), dist_stack2(stack_size*nblks))
4544 324 : CALL dbt_get_info(rho_template, proc_dist_1=dist1, proc_dist_2=dist2)
4545 6468 : DO is = 1, stack_size
4546 26624 : dist_stack1((is - 1)*nblks + 1:is*nblks) = dist1(:)
4547 26948 : dist_stack2((is - 1)*nblks + 1:is*nblks) = dist2(:)
4548 : END DO
4549 :
4550 : !First 2c tensor matches the distribution of template
4551 : !It is stacked in both directions
4552 324 : CALL dbt_distribution_new(t_dist, ri_data%pgrid_2d, dist_stack1, dist_stack2)
4553 324 : CALL dbt_create(rho_stack(1), "RHO_stack", t_dist, [1], [2], bsizes_stack, bsizes_stack)
4554 324 : CALL dbt_distribution_destroy(t_dist)
4555 324 : DEALLOCATE (dist1, dist2, dist_stack1, dist_stack2)
4556 :
4557 : !Second 2c tensor has optimal distribution on the 2d pgrid
4558 324 : CALL create_2c_tensor(rho_stack(2), dist1, dist2, ri_data%pgrid_2d, bsizes_stack, bsizes_stack, name="RHO_stack")
4559 324 : DEALLOCATE (dist1, dist2)
4560 :
4561 324 : CALL dbt_get_info(ints_template, nblks_total=nblks_3c)
4562 2268 : ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)))
4563 1620 : ALLOCATE (dist_stack3(stack_size*nblks_3c(3)), bsizes_RI_ext(nblks_3c(2)))
4564 : CALL dbt_get_info(ints_template, proc_dist_1=dist1, proc_dist_2=dist2, &
4565 324 : proc_dist_3=dist3, blk_size_2=bsizes_RI_ext)
4566 6468 : DO is = 1, stack_size
4567 26948 : dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
4568 : END DO
4569 :
4570 : !First 3c tensor matches the distribution of template
4571 324 : CALL dbt_distribution_new(t_dist, ri_data%pgrid_1, dist1, dist2, dist_stack3)
4572 : CALL dbt_create(ints_stack(1), "ints_stack", t_dist, [1, 2], [3], ri_data%bsizes_AO_split, &
4573 324 : bsizes_RI_ext, bsizes_stack)
4574 324 : CALL dbt_distribution_destroy(t_dist)
4575 324 : DEALLOCATE (dist1, dist2, dist3, dist_stack3)
4576 :
4577 : !Second 3c tensor has optimal pgrid
4578 324 : pdims_3d = 0
4579 1296 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid, tensor_dims=[nblks_3c(1), nblks_3c(2), stack_size*nblks_3c(3)])
4580 : CALL create_3c_tensor(ints_stack(2), dist1, dist2, dist3, pgrid, ri_data%bsizes_AO_split, &
4581 324 : bsizes_RI_ext, bsizes_stack, [1, 2], [3], name="ints_stack")
4582 324 : DEALLOCATE (dist1, dist2, dist3)
4583 324 : CALL dbt_pgrid_destroy(pgrid)
4584 :
4585 : !The result tensor has the same shape and dist as the integral tensor
4586 324 : CALL dbt_create(ints_stack(1), res_stack(1))
4587 324 : CALL dbt_create(ints_stack(2), res_stack(2))
4588 :
4589 648 : END SUBROUTINE get_stack_tensors
4590 :
4591 : ! **************************************************************************************************
4592 : !> \brief Fill the stack of 3c tensors accrding to the order in the images input
4593 : !> \param t_3c_stack ...
4594 : !> \param t_3c_in ...
4595 : !> \param images ...
4596 : !> \param stack_dim ...
4597 : !> \param ri_data ...
4598 : !> \param filter_at ...
4599 : !> \param filter_dim ...
4600 : !> \param idx_to_at ...
4601 : !> \param img_bounds ...
4602 : ! **************************************************************************************************
4603 41263 : SUBROUTINE fill_3c_stack(t_3c_stack, t_3c_in, images, stack_dim, ri_data, filter_at, filter_dim, &
4604 41263 : idx_to_at, img_bounds)
4605 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_stack
4606 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_in
4607 : INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4608 : INTEGER, INTENT(IN) :: stack_dim
4609 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4610 : INTEGER, INTENT(IN), OPTIONAL :: filter_at, filter_dim
4611 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: idx_to_at
4612 : INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2)
4613 :
4614 : INTEGER :: dest(3), i_img, idx, ind(3), lb, nblks, &
4615 : nimg, offset, ub
4616 : LOGICAL :: do_filter, found
4617 41263 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4618 : TYPE(dbt_iterator_type) :: iter
4619 :
4620 : !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4621 : !in the stack tensor (corresponding to pair index). Distributions match by construction
4622 41263 : nimg = ri_data%nimg
4623 41263 : nblks = SIZE(ri_data%bsizes_AO_split)
4624 :
4625 41263 : do_filter = .FALSE.
4626 40791 : IF (PRESENT(filter_at) .AND. PRESENT(filter_dim) .AND. PRESENT(idx_to_at)) do_filter = .TRUE.
4627 :
4628 41263 : lb = 1
4629 41263 : ub = nimg
4630 41263 : offset = 0
4631 41263 : IF (PRESENT(img_bounds)) THEN
4632 41263 : lb = img_bounds(1)
4633 41263 : ub = img_bounds(2) - 1
4634 41263 : offset = lb - 1
4635 : END IF
4636 :
4637 621110 : DO idx = lb, ub
4638 579847 : i_img = images(idx)
4639 579847 : IF (i_img == 0 .OR. i_img > nimg) CYCLE
4640 :
4641 : !$OMP PARALLEL DEFAULT(NONE) &
4642 : !$OMP SHARED(idx,i_img,t_3c_in,t_3c_stack,nblks,stack_dim,filter_at,filter_dim,idx_to_at,do_filter,offset) &
4643 621110 : !$OMP PRIVATE(iter,ind,blk,found,dest)
4644 : CALL dbt_iterator_start(iter, t_3c_in(i_img))
4645 : DO WHILE (dbt_iterator_blocks_left(iter))
4646 : CALL dbt_iterator_next_block(iter, ind)
4647 : CALL dbt_get_block(t_3c_in(i_img), ind, blk, found)
4648 : IF (.NOT. found) CYCLE
4649 :
4650 : IF (do_filter) THEN
4651 : IF (.NOT. idx_to_at(ind(filter_dim)) == filter_at) CYCLE
4652 : END IF
4653 :
4654 : IF (stack_dim == 1) THEN
4655 : dest = [(idx - offset - 1)*nblks + ind(1), ind(2), ind(3)]
4656 : ELSE IF (stack_dim == 2) THEN
4657 : dest = [ind(1), (idx - offset - 1)*nblks + ind(2), ind(3)]
4658 : ELSE
4659 : dest = [ind(1), ind(2), (idx - offset - 1)*nblks + ind(3)]
4660 : END IF
4661 :
4662 : CALL dbt_put_block(t_3c_stack, dest, SHAPE(blk), blk)
4663 : DEALLOCATE (blk)
4664 : END DO
4665 : CALL dbt_iterator_stop(iter)
4666 : !$OMP END PARALLEL
4667 : END DO !i_img
4668 41263 : CALL dbt_finalize(t_3c_stack)
4669 :
4670 82526 : END SUBROUTINE fill_3c_stack
4671 :
4672 : ! **************************************************************************************************
4673 : !> \brief Fill the stack of 2c tensors based on the content of images input
4674 : !> \param t_2c_stack ...
4675 : !> \param t_2c_in ...
4676 : !> \param images ...
4677 : !> \param stack_dim ...
4678 : !> \param ri_data ...
4679 : !> \param img_bounds ...
4680 : !> \param shift ...
4681 : ! **************************************************************************************************
4682 19680 : SUBROUTINE fill_2c_stack(t_2c_stack, t_2c_in, images, stack_dim, ri_data, img_bounds, shift)
4683 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_stack
4684 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_in
4685 : INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4686 : INTEGER, INTENT(IN) :: stack_dim
4687 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4688 : INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2), shift
4689 :
4690 : INTEGER :: dest(2), i_img, idx, ind(2), lb, &
4691 : my_shift, nblks, nimg, offset, ub
4692 : LOGICAL :: found
4693 19680 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
4694 : TYPE(dbt_iterator_type) :: iter
4695 :
4696 : !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4697 : !in the stack tensor (corresponding to pair index). Distributions match by construction
4698 19680 : nimg = ri_data%nimg
4699 19680 : nblks = SIZE(ri_data%bsizes_AO_split)
4700 :
4701 19680 : lb = 1
4702 19680 : ub = nimg
4703 19680 : offset = 0
4704 19680 : IF (PRESENT(img_bounds)) THEN
4705 19680 : lb = img_bounds(1)
4706 19680 : ub = img_bounds(2) - 1
4707 19680 : offset = lb - 1
4708 : END IF
4709 :
4710 19680 : my_shift = 1
4711 19680 : IF (PRESENT(shift)) my_shift = shift
4712 :
4713 302352 : DO idx = lb, ub
4714 282672 : i_img = images(idx)
4715 282672 : IF (i_img == 0 .OR. i_img > nimg) CYCLE
4716 :
4717 : !$OMP PARALLEL DEFAULT(NONE) SHARED(idx,i_img,t_2c_in,t_2c_stack,nblks,stack_dim,offset,my_shift) &
4718 302352 : !$OMP PRIVATE(iter,ind,blk,found,dest)
4719 : CALL dbt_iterator_start(iter, t_2c_in(i_img))
4720 : DO WHILE (dbt_iterator_blocks_left(iter))
4721 : CALL dbt_iterator_next_block(iter, ind)
4722 : CALL dbt_get_block(t_2c_in(i_img), ind, blk, found)
4723 : IF (.NOT. found) CYCLE
4724 :
4725 : IF (stack_dim == 1) THEN
4726 : dest = [(idx - offset - 1)*nblks + ind(1), (my_shift - 1)*nblks + ind(2)]
4727 : ELSE
4728 : dest = [(my_shift - 1)*nblks + ind(1), (idx - offset - 1)*nblks + ind(2)]
4729 : END IF
4730 :
4731 : CALL dbt_put_block(t_2c_stack, dest, SHAPE(blk), blk)
4732 : DEALLOCATE (blk)
4733 : END DO
4734 : CALL dbt_iterator_stop(iter)
4735 : !$OMP END PARALLEL
4736 : END DO !idx
4737 19680 : CALL dbt_finalize(t_2c_stack)
4738 :
4739 39360 : END SUBROUTINE fill_2c_stack
4740 :
4741 : ! **************************************************************************************************
4742 : !> \brief Unstacks a stacked 3c tensor containing t_3c_apc
4743 : !> \param t_3c_apc ...
4744 : !> \param t_stacked ...
4745 : !> \param idx ...
4746 : ! **************************************************************************************************
4747 24152 : SUBROUTINE unstack_t_3c_apc(t_3c_apc, t_stacked, idx)
4748 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_apc, t_stacked
4749 : INTEGER, INTENT(IN) :: idx
4750 :
4751 : INTEGER :: current_idx
4752 : INTEGER, DIMENSION(3) :: ind, nblks_3c
4753 : LOGICAL :: found
4754 24152 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4755 : TYPE(dbt_iterator_type) :: iter
4756 :
4757 : !Note: t_3c_apc and t_stacked must have the same ditribution
4758 24152 : CALL dbt_get_info(t_3c_apc, nblks_total=nblks_3c)
4759 :
4760 24152 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_apc,t_stacked,idx,nblks_3c) PRIVATE(iter,ind,blk,found,current_idx)
4761 : CALL dbt_iterator_start(iter, t_stacked)
4762 : DO WHILE (dbt_iterator_blocks_left(iter))
4763 : CALL dbt_iterator_next_block(iter, ind)
4764 :
4765 : !tensor is stacked along the 3rd dimension
4766 : current_idx = (ind(3) - 1)/nblks_3c(3) + 1
4767 : IF (.NOT. idx == current_idx) CYCLE
4768 :
4769 : CALL dbt_get_block(t_stacked, ind, blk, found)
4770 : IF (.NOT. found) CYCLE
4771 :
4772 : CALL dbt_put_block(t_3c_apc, [ind(1), ind(2), ind(3) - (idx - 1)*nblks_3c(3)], SHAPE(blk), blk)
4773 : DEALLOCATE (blk)
4774 : END DO
4775 : CALL dbt_iterator_stop(iter)
4776 : !$OMP END PARALLEL
4777 :
4778 24152 : END SUBROUTINE unstack_t_3c_apc
4779 :
4780 : ! **************************************************************************************************
4781 : !> \brief copies the 3c integrals correspoinding to a single atom mu from the general (P^0| mu^0 sigam^a)
4782 : !> \param t_3c_at ...
4783 : !> \param t_3c_ints ...
4784 : !> \param iatom ...
4785 : !> \param dim_at ...
4786 : !> \param idx_to_at ...
4787 : ! **************************************************************************************************
4788 0 : SUBROUTINE get_atom_3c_ints(t_3c_at, t_3c_ints, iatom, dim_at, idx_to_at)
4789 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_at, t_3c_ints
4790 : INTEGER, INTENT(IN) :: iatom, dim_at
4791 : INTEGER, DIMENSION(:), INTENT(IN) :: idx_to_at
4792 :
4793 : INTEGER, DIMENSION(3) :: ind
4794 : LOGICAL :: found
4795 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4796 : TYPE(dbt_iterator_type) :: iter
4797 :
4798 0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_ints,t_3c_at,iatom,idx_to_at,dim_at) PRIVATE(iter,ind,blk,found)
4799 : CALL dbt_iterator_start(iter, t_3c_ints)
4800 : DO WHILE (dbt_iterator_blocks_left(iter))
4801 : CALL dbt_iterator_next_block(iter, ind)
4802 : IF (.NOT. idx_to_at(ind(dim_at)) == iatom) CYCLE
4803 :
4804 : CALL dbt_get_block(t_3c_ints, ind, blk, found)
4805 : IF (.NOT. found) CYCLE
4806 :
4807 : CALL dbt_put_block(t_3c_at, ind, SHAPE(blk), blk)
4808 : DEALLOCATE (blk)
4809 : END DO
4810 : CALL dbt_iterator_stop(iter)
4811 : !$OMP END PARALLEL
4812 0 : CALL dbt_finalize(t_3c_at)
4813 :
4814 0 : END SUBROUTINE get_atom_3c_ints
4815 :
4816 : ! **************************************************************************************************
4817 : !> \brief Precalculate the 3c and 2c derivatives tensors
4818 : !> \param t_3c_der_RI ...
4819 : !> \param t_3c_der_AO ...
4820 : !> \param mat_der_pot ...
4821 : !> \param t_2c_der_metric ...
4822 : !> \param ri_data ...
4823 : !> \param qs_env ...
4824 : ! **************************************************************************************************
4825 50 : SUBROUTINE precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
4826 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_RI, t_3c_der_AO
4827 : TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot
4828 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_2c_der_metric
4829 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4830 : TYPE(qs_environment_type), POINTER :: qs_env
4831 :
4832 : CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
4833 :
4834 : INTEGER :: handle, handle2, i_img, i_mem, i_RI, &
4835 : i_xyz, iatom, n_mem, natom, nblks_RI, &
4836 : ncell_RI, nimg, nkind, nthreads
4837 : INTEGER(int_8) :: nze
4838 50 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, dist_AO_1, &
4839 100 : dist_AO_2, dist_RI, dist_RI_ext, dummy_end, dummy_start, end_blocks, start_blocks
4840 : INTEGER, DIMENSION(3) :: pcoord, pdims
4841 100 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
4842 : REAL(dp) :: occ
4843 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
4844 : TYPE(dbcsr_type) :: dbcsr_template
4845 50 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_metric
4846 450 : TYPE(dbt_distribution_type) :: t_dist
4847 150 : TYPE(dbt_pgrid_type) :: pgrid
4848 450 : TYPE(dbt_type) :: t_3c_template
4849 50 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_AO_prv, t_3c_der_RI_prv
4850 : TYPE(dft_control_type), POINTER :: dft_control
4851 : TYPE(distribution_2d_type), POINTER :: dist_2d
4852 : TYPE(distribution_3d_type) :: dist_3d
4853 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4854 50 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
4855 50 : TYPE(mp_cart_type) :: mp_comm_t3c
4856 : TYPE(mp_para_env_type), POINTER :: para_env
4857 : TYPE(neighbor_list_3c_type) :: nl_3c
4858 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4859 50 : POINTER :: nl_2c
4860 50 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4861 50 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4862 :
4863 50 : NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env, row_bsize, col_bsize)
4864 :
4865 50 : CALL timeset(routineN, handle)
4866 :
4867 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
4868 50 : particle_set=particle_set, dft_control=dft_control, para_env=para_env)
4869 :
4870 50 : nimg = ri_data%nimg
4871 50 : ncell_RI = ri_data%ncell_RI
4872 :
4873 356 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
4874 50 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
4875 50 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
4876 50 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
4877 50 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
4878 :
4879 : !Dealing with the 3c derivatives
4880 50 : nthreads = 1
4881 50 : !$ nthreads = omp_get_num_threads()
4882 50 : pdims = 0
4883 200 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
4884 :
4885 : CALL create_3c_tensor(t_3c_template, dist_AO_1, dist_AO_2, dist_RI, pgrid, &
4886 : ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4887 50 : map1=[1, 2], map2=[3], name="tmp")
4888 50 : CALL dbt_destroy(t_3c_template)
4889 :
4890 : !We stack the RI basis images. Keep consistent distribution
4891 50 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
4892 150 : ALLOCATE (dist_RI_ext(natom*ncell_RI))
4893 100 : ALLOCATE (bsizes_RI_ext(natom*ncell_RI))
4894 150 : ALLOCATE (bsizes_RI_ext_split(nblks_RI*ncell_RI))
4895 366 : DO i_RI = 1, ncell_RI
4896 948 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
4897 948 : dist_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = dist_RI(:)
4898 1802 : bsizes_RI_ext_split((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
4899 : END DO
4900 :
4901 50 : CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
4902 : CALL dbt_create(t_3c_template, "KP_3c_der", t_dist, [1, 2], [3], &
4903 50 : ri_data%bsizes_AO, ri_data%bsizes_AO, bsizes_RI_ext)
4904 50 : CALL dbt_distribution_destroy(t_dist)
4905 :
4906 10132 : ALLOCATE (t_3c_der_RI_prv(nimg, 1, 3), t_3c_der_AO_prv(nimg, 1, 3))
4907 200 : DO i_xyz = 1, 3
4908 4466 : DO i_img = 1, nimg
4909 4266 : CALL dbt_create(t_3c_template, t_3c_der_RI_prv(i_img, 1, i_xyz))
4910 4416 : CALL dbt_create(t_3c_template, t_3c_der_AO_prv(i_img, 1, i_xyz))
4911 : END DO
4912 : END DO
4913 50 : CALL dbt_destroy(t_3c_template)
4914 :
4915 50 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
4916 50 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
4917 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
4918 50 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
4919 50 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
4920 50 : CALL dbt_pgrid_destroy(pgrid)
4921 :
4922 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
4923 50 : "HFX_3c_nl", qs_env, op_pos=2, sym_jk=.FALSE., own_dist=.TRUE.)
4924 :
4925 50 : n_mem = ri_data%n_mem
4926 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
4927 : start_blocks, end_blocks)
4928 50 : DEALLOCATE (dummy_start, dummy_end)
4929 :
4930 : CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid_2, &
4931 : bsizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
4932 50 : map1=[1], map2=[2, 3], name="der (RI | AO AO)")
4933 200 : DO i_xyz = 1, 3
4934 4466 : DO i_img = 1, nimg
4935 4266 : CALL dbt_create(t_3c_template, t_3c_der_RI(i_img, i_xyz))
4936 4416 : CALL dbt_create(t_3c_template, t_3c_der_AO(i_img, i_xyz))
4937 : END DO
4938 : END DO
4939 :
4940 140 : DO i_mem = 1, n_mem
4941 : CALL build_3c_derivatives(t_3c_der_AO_prv, t_3c_der_RI_prv, ri_data%filter_eps, qs_env, &
4942 : nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, &
4943 : ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=2, &
4944 : do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
4945 : bounds_k=[start_blocks(i_mem), end_blocks(i_mem)], &
4946 270 : RI_range=ri_data%kp_RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
4947 :
4948 90 : CALL timeset(routineN//"_cpy", handle2)
4949 : !We go from (mu^0 sigma^i | P^j) to (P^i| sigma^j mu^0) and finally to (P^i| mu^0 sigma^j)
4950 2834 : DO i_img = 1, nimg
4951 11066 : DO i_xyz = 1, 3
4952 : !derivative wrt to mu^0
4953 8232 : CALL get_tensor_occupancy(t_3c_der_AO_prv(i_img, 1, i_xyz), nze, occ)
4954 8232 : IF (nze > 0) THEN
4955 : CALL dbt_copy(t_3c_der_AO_prv(i_img, 1, i_xyz), t_3c_template, &
4956 4512 : order=[3, 2, 1], move_data=.TRUE.)
4957 4512 : CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4958 : CALL dbt_copy(t_3c_template, t_3c_der_AO(i_img, i_xyz), &
4959 4512 : order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
4960 : END IF
4961 :
4962 : !derivative wrt to P^i
4963 8232 : CALL get_tensor_occupancy(t_3c_der_RI_prv(i_img, 1, i_xyz), nze, occ)
4964 19208 : IF (nze > 0) THEN
4965 : CALL dbt_copy(t_3c_der_RI_prv(i_img, 1, i_xyz), t_3c_template, &
4966 4482 : order=[3, 2, 1], move_data=.TRUE.)
4967 4482 : CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4968 : CALL dbt_copy(t_3c_template, t_3c_der_RI(i_img, i_xyz), &
4969 4482 : order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
4970 : END IF
4971 : END DO
4972 : END DO
4973 230 : CALL timestop(handle2)
4974 : END DO
4975 50 : CALL dbt_destroy(t_3c_template)
4976 :
4977 50 : CALL neighbor_list_3c_destroy(nl_3c)
4978 200 : DO i_xyz = 1, 3
4979 4466 : DO i_img = 1, nimg
4980 4266 : CALL dbt_destroy(t_3c_der_RI_prv(i_img, 1, i_xyz))
4981 4416 : CALL dbt_destroy(t_3c_der_AO_prv(i_img, 1, i_xyz))
4982 : END DO
4983 : END DO
4984 8582 : DEALLOCATE (t_3c_der_RI_prv, t_3c_der_AO_prv)
4985 :
4986 : !Reorder 3c derivatives to be consistant with ints
4987 50 : CALL reorder_3c_derivs(t_3c_der_RI, ri_data)
4988 50 : CALL reorder_3c_derivs(t_3c_der_AO, ri_data)
4989 :
4990 50 : CALL timeset(routineN//"_2c", handle2)
4991 : !The 2-center derivatives
4992 50 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
4993 150 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
4994 100 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
4995 150 : row_bsize(:) = ri_data%bsizes_RI
4996 150 : col_bsize(:) = ri_data%bsizes_RI
4997 :
4998 : CALL dbcsr_create(dbcsr_template, "2c_der", dbcsr_dist, dbcsr_type_no_symmetry, &
4999 50 : row_bsize, col_bsize)
5000 50 : CALL dbcsr_distribution_release(dbcsr_dist)
5001 50 : DEALLOCATE (col_bsize, row_bsize)
5002 :
5003 4566 : ALLOCATE (mat_der_metric(nimg, 3))
5004 200 : DO i_xyz = 1, 3
5005 4466 : DO i_img = 1, nimg
5006 4266 : CALL dbcsr_create(mat_der_pot(i_img, i_xyz), template=dbcsr_template)
5007 4416 : CALL dbcsr_create(mat_der_metric(i_img, i_xyz), template=dbcsr_template)
5008 : END DO
5009 : END DO
5010 50 : CALL dbcsr_release(dbcsr_template)
5011 :
5012 : !HFX potential derivatives
5013 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
5014 50 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
5015 : CALL build_2c_derivatives(mat_der_pot, ri_data%filter_eps_2c, qs_env, nl_2c, &
5016 50 : basis_set_RI, basis_set_RI, ri_data%hfx_pot, do_kpoints=.TRUE.)
5017 50 : CALL release_neighbor_list_sets(nl_2c)
5018 :
5019 : !RI metric derivatives
5020 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
5021 50 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
5022 : CALL build_2c_derivatives(mat_der_metric, ri_data%filter_eps_2c, qs_env, nl_2c, &
5023 50 : basis_set_RI, basis_set_RI, ri_data%ri_metric, do_kpoints=.TRUE.)
5024 50 : CALL release_neighbor_list_sets(nl_2c)
5025 :
5026 : !Get into extended RI basis and tensor format
5027 200 : DO i_xyz = 1, 3
5028 450 : DO iatom = 1, natom
5029 300 : CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_der_metric(iatom, i_xyz))
5030 : CALL get_ext_2c_int(t_2c_der_metric(iatom, i_xyz), mat_der_metric(:, i_xyz), &
5031 450 : iatom, iatom, 1, ri_data, qs_env)
5032 : END DO
5033 4466 : DO i_img = 1, nimg
5034 4416 : CALL dbcsr_release(mat_der_metric(i_img, i_xyz))
5035 : END DO
5036 : END DO
5037 50 : CALL timestop(handle2)
5038 :
5039 50 : CALL timestop(handle)
5040 :
5041 300 : END SUBROUTINE precalc_derivatives
5042 :
5043 : ! **************************************************************************************************
5044 : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
5045 : !> \param force ...
5046 : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
5047 : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
5048 : !> \param atom_of_kind ...
5049 : !> \param kind_of ...
5050 : !> \param img in which periodic image the second center of the tensor is
5051 : !> \param pref ...
5052 : !> \param ri_data ...
5053 : !> \param qs_env ...
5054 : !> \param work_virial ...
5055 : !> \param cell ...
5056 : !> \param particle_set ...
5057 : !> \param diag ...
5058 : !> \param offdiag ...
5059 : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution. Atomic block sizes are
5060 : !> assumed
5061 : ! **************************************************************************************************
5062 3705 : SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, img, pref, &
5063 : ri_data, qs_env, work_virial, cell, particle_set, diag, offdiag)
5064 :
5065 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5066 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
5067 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_der
5068 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
5069 : INTEGER, INTENT(IN) :: img
5070 : REAL(dp), INTENT(IN) :: pref
5071 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5072 : TYPE(qs_environment_type), POINTER :: qs_env
5073 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5074 : TYPE(cell_type), OPTIONAL, POINTER :: cell
5075 : TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5076 : POINTER :: particle_set
5077 : LOGICAL, INTENT(IN), OPTIONAL :: diag, offdiag
5078 :
5079 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_2c_der_force'
5080 :
5081 : INTEGER :: handle, i_img, i_RI, i_xyz, iat, &
5082 : iat_of_kind, ikind, j_img, j_RI, &
5083 : j_xyz, jat, jat_of_kind, jkind, natom
5084 : INTEGER, DIMENSION(2) :: ind
5085 3705 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5086 : LOGICAL :: found, my_diag, my_offdiag, use_virial
5087 : REAL(dp) :: new_force
5088 3705 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
5089 : REAL(dp), DIMENSION(3) :: scoord
5090 : TYPE(dbt_iterator_type) :: iter
5091 : TYPE(kpoint_type), POINTER :: kpoints
5092 :
5093 3705 : NULLIFY (kpoints, index_to_cell)
5094 :
5095 : !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
5096 : !update the relevant force
5097 :
5098 3705 : CALL timeset(routineN, handle)
5099 :
5100 3705 : use_virial = .FALSE.
5101 3705 : IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .TRUE.
5102 :
5103 3705 : my_diag = .FALSE.
5104 3705 : IF (PRESENT(diag)) my_diag = diag
5105 :
5106 2964 : my_offdiag = .FALSE.
5107 2964 : IF (PRESENT(diag)) my_offdiag = offdiag
5108 :
5109 3705 : CALL get_qs_env(qs_env, kpoints=kpoints, natom=natom)
5110 3705 : CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5111 :
5112 : !$OMP PARALLEL DEFAULT(NONE) &
5113 : !$OMP SHARED(t_2c_der,t_2c_contr,work_virial,force,use_virial,natom,index_to_cell,ri_data,img) &
5114 : !$OMP SHARED(pref,atom_of_kind,kind_of,particle_set,cell,my_diag,my_offdiag) &
5115 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,i_RI,i_img,j_RI,j_img) &
5116 3705 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
5117 : DO i_xyz = 1, 3
5118 : CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
5119 : DO WHILE (dbt_iterator_blocks_left(iter))
5120 : CALL dbt_iterator_next_block(iter, ind)
5121 :
5122 : !Only take forecs due to block diagonal or block off-diagonal, depending on arguments
5123 : IF ((my_diag .AND. .NOT. my_offdiag) .OR. (.NOT. my_diag .AND. my_offdiag)) THEN
5124 : IF (my_diag .AND. (ind(1) /= ind(2))) CYCLE
5125 : IF (my_offdiag .AND. (ind(1) == ind(2))) CYCLE
5126 : END IF
5127 :
5128 : CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
5129 : CPASSERT(found)
5130 : CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
5131 :
5132 : IF (found) THEN
5133 :
5134 : !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
5135 : !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
5136 : new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
5137 :
5138 : i_RI = (ind(1) - 1)/natom + 1
5139 : i_img = ri_data%RI_cell_to_img(i_RI)
5140 : iat = ind(1) - (i_RI - 1)*natom
5141 : iat_of_kind = atom_of_kind(iat)
5142 : ikind = kind_of(iat)
5143 :
5144 : j_RI = (ind(2) - 1)/natom + 1
5145 : j_img = ri_data%RI_cell_to_img(j_RI)
5146 : jat = ind(2) - (j_RI - 1)*natom
5147 : jat_of_kind = atom_of_kind(jat)
5148 : jkind = kind_of(jat)
5149 :
5150 : !Force on iatom (first center)
5151 : !$OMP ATOMIC
5152 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5153 : + new_force
5154 :
5155 : IF (use_virial) THEN
5156 :
5157 : CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5158 : scoord(:) = scoord(:) + REAL(index_to_cell(:, i_img), dp)
5159 :
5160 : DO j_xyz = 1, 3
5161 : !$OMP ATOMIC
5162 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5163 : END DO
5164 : END IF
5165 :
5166 : !Force on jatom (second center)
5167 : !$OMP ATOMIC
5168 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5169 : - new_force
5170 :
5171 : IF (use_virial) THEN
5172 :
5173 : CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5174 : scoord(:) = scoord(:) + REAL(index_to_cell(:, j_img) + index_to_cell(:, img), dp)
5175 :
5176 : DO j_xyz = 1, 3
5177 : !$OMP ATOMIC
5178 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - new_force*scoord(j_xyz)
5179 : END DO
5180 : END IF
5181 :
5182 : DEALLOCATE (contr_blk)
5183 : END IF
5184 :
5185 : DEALLOCATE (der_blk)
5186 : END DO !iter
5187 : CALL dbt_iterator_stop(iter)
5188 :
5189 : END DO !i_xyz
5190 : !$OMP END PARALLEL
5191 3705 : CALL timestop(handle)
5192 :
5193 7410 : END SUBROUTINE get_2c_der_force
5194 :
5195 : ! **************************************************************************************************
5196 : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
5197 : !> force = sum_ijk A_ijk B_ijk., the B tensor is (P^0| sigma^0 lambda^img), with P in the
5198 : !> extended RI basis. Note that all tensors are stacked along the 3rd dimension
5199 : !> \param force ...
5200 : !> \param t_3c_contr ...
5201 : !> \param t_3c_der_1 ...
5202 : !> \param t_3c_der_2 ...
5203 : !> \param atom_of_kind ...
5204 : !> \param kind_of ...
5205 : !> \param idx_to_at_RI ...
5206 : !> \param idx_to_at_AO ...
5207 : !> \param i_images ...
5208 : !> \param lb_img ...
5209 : !> \param pref ...
5210 : !> \param ri_data ...
5211 : !> \param qs_env ...
5212 : !> \param work_virial ...
5213 : !> \param cell ...
5214 : !> \param particle_set ...
5215 : ! **************************************************************************************************
5216 2654 : SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der_1, t_3c_der_2, atom_of_kind, kind_of, &
5217 5308 : idx_to_at_RI, idx_to_at_AO, i_images, lb_img, pref, &
5218 : ri_data, qs_env, work_virial, cell, particle_set)
5219 :
5220 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5221 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
5222 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der_1, t_3c_der_2
5223 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at_RI, &
5224 : idx_to_at_AO, i_images
5225 : INTEGER, INTENT(IN) :: lb_img
5226 : REAL(dp), INTENT(IN) :: pref
5227 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5228 : TYPE(qs_environment_type), POINTER :: qs_env
5229 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5230 : TYPE(cell_type), OPTIONAL, POINTER :: cell
5231 : TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5232 : POINTER :: particle_set
5233 :
5234 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
5235 :
5236 : INTEGER :: handle, i_RI, i_xyz, iat, iat_of_kind, idx, ikind, j_xyz, jat, jat_of_kind, &
5237 : jkind, kat, kat_of_kind, kkind, nblks_AO, nblks_RI, RI_img
5238 : INTEGER, DIMENSION(3) :: ind
5239 2654 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5240 : LOGICAL :: found, found_1, found_2, use_virial
5241 : REAL(dp) :: new_force
5242 2654 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk_1, der_blk_2, &
5243 2654 : der_blk_3
5244 : REAL(dp), DIMENSION(3) :: scoord
5245 : TYPE(dbt_iterator_type) :: iter
5246 : TYPE(kpoint_type), POINTER :: kpoints
5247 :
5248 2654 : NULLIFY (kpoints, index_to_cell)
5249 :
5250 2654 : CALL timeset(routineN, handle)
5251 :
5252 2654 : CALL get_qs_env(qs_env, kpoints=kpoints)
5253 2654 : CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5254 :
5255 2654 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
5256 2654 : nblks_AO = SIZE(ri_data%bsizes_AO_split)
5257 :
5258 2654 : use_virial = .FALSE.
5259 2654 : IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .TRUE.
5260 :
5261 : !$OMP PARALLEL DEFAULT(NONE) &
5262 : !$OMP SHARED(t_3c_der_1, t_3c_der_2,t_3c_contr,work_virial,force,use_virial,index_to_cell,i_images,lb_img) &
5263 : !$OMP SHARED(pref,idx_to_at_AO,atom_of_kind,kind_of,particle_set,cell,idx_to_at_RI,ri_data,nblks_RI,nblks_AO) &
5264 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk_1,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord) &
5265 2654 : !$OMP PRIVATE(jat,kat,jat_of_kind,kat_of_kind,jkind,kkind,i_RI,RI_img,der_blk_2,der_blk_3,found_1,found_2,idx)
5266 : CALL dbt_iterator_start(iter, t_3c_contr)
5267 : DO WHILE (dbt_iterator_blocks_left(iter))
5268 : CALL dbt_iterator_next_block(iter, ind)
5269 :
5270 : CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
5271 : IF (found) THEN
5272 :
5273 : DO i_xyz = 1, 3
5274 : CALL dbt_get_block(t_3c_der_1(i_xyz), ind, der_blk_1, found_1)
5275 : IF (.NOT. found_1) THEN
5276 : DEALLOCATE (der_blk_1)
5277 : ALLOCATE (der_blk_1(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5278 : der_blk_1(:, :, :) = 0.0_dp
5279 : END IF
5280 : CALL dbt_get_block(t_3c_der_2(i_xyz), ind, der_blk_2, found_2)
5281 : IF (.NOT. found_2) THEN
5282 : DEALLOCATE (der_blk_2)
5283 : ALLOCATE (der_blk_2(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5284 : der_blk_2(:, :, :) = 0.0_dp
5285 : END IF
5286 :
5287 : ALLOCATE (der_blk_3(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5288 : der_blk_3(:, :, :) = -(der_blk_1(:, :, :) + der_blk_2(:, :, :))
5289 :
5290 : !We assume the tensors are in the format (P^0| sigma^0 mu^a+c-b), with P a member of the
5291 : !extended RI basis set
5292 :
5293 : !Force for the first center (RI extended basis, zero cell)
5294 : new_force = pref*SUM(der_blk_1(:, :, :)*contr_blk(:, :, :))
5295 :
5296 : i_RI = (ind(1) - 1)/nblks_RI + 1
5297 : RI_img = ri_data%RI_cell_to_img(i_RI)
5298 : iat = idx_to_at_RI(ind(1) - (i_RI - 1)*nblks_RI)
5299 : iat_of_kind = atom_of_kind(iat)
5300 : ikind = kind_of(iat)
5301 :
5302 : !$OMP ATOMIC
5303 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5304 : + new_force
5305 :
5306 : IF (use_virial) THEN
5307 :
5308 : CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5309 : scoord(:) = scoord(:) + REAL(index_to_cell(:, RI_img), dp)
5310 :
5311 : DO j_xyz = 1, 3
5312 : !$OMP ATOMIC
5313 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5314 : END DO
5315 : END IF
5316 :
5317 : !Force with respect to the second center (AO basis, zero cell)
5318 : new_force = pref*SUM(der_blk_2(:, :, :)*contr_blk(:, :, :))
5319 : jat = idx_to_at_AO(ind(2))
5320 : jat_of_kind = atom_of_kind(jat)
5321 : jkind = kind_of(jat)
5322 :
5323 : !$OMP ATOMIC
5324 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5325 : + new_force
5326 :
5327 : IF (use_virial) THEN
5328 :
5329 : CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5330 :
5331 : DO j_xyz = 1, 3
5332 : !$OMP ATOMIC
5333 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5334 : END DO
5335 : END IF
5336 :
5337 : !Force with respect to the third center (AO basis, apc_img - b_img)
5338 : !Note: tensors are stacked along the 3rd direction
5339 : new_force = pref*SUM(der_blk_3(:, :, :)*contr_blk(:, :, :))
5340 : idx = (ind(3) - 1)/nblks_AO + 1
5341 : kat = idx_to_at_AO(ind(3) - (idx - 1)*nblks_AO)
5342 : kat_of_kind = atom_of_kind(kat)
5343 : kkind = kind_of(kat)
5344 :
5345 : !$OMP ATOMIC
5346 : force(kkind)%fock_4c(i_xyz, kat_of_kind) = force(kkind)%fock_4c(i_xyz, kat_of_kind) &
5347 : + new_force
5348 :
5349 : IF (use_virial) THEN
5350 : CALL real_to_scaled(scoord, pbc(particle_set(kat)%r, cell), cell)
5351 : scoord(:) = scoord(:) + REAL(index_to_cell(:, i_images(lb_img - 1 + idx)), dp)
5352 :
5353 : DO j_xyz = 1, 3
5354 : !$OMP ATOMIC
5355 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5356 : END DO
5357 : END IF
5358 :
5359 : DEALLOCATE (der_blk_1, der_blk_2, der_blk_3)
5360 : END DO !i_xyz
5361 : DEALLOCATE (contr_blk)
5362 : END IF !found
5363 : END DO !iter
5364 : CALL dbt_iterator_stop(iter)
5365 : !$OMP END PARALLEL
5366 2654 : CALL timestop(handle)
5367 :
5368 5308 : END SUBROUTINE get_force_from_3c_trace
5369 :
5370 : END MODULE hfx_ri_kp
|