LCOV - code coverage report
Current view: top level - src - gw_large_cell_gamma_ri_rs.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:561f475) Lines: 98.5 % 481 474
Test Date: 2026-06-21 06:48:54 Functions: 100.0 % 16 16

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief GW using RI-RS Approximation
      10              : !> \par History
      11              : !>      04.2026 created [Ritaj Tyagi]
      12              : ! **************************************************************************************************
      13              : MODULE gw_large_cell_Gamma_ri_rs
      14              :    USE atomic_kind_types,               ONLY: atomic_kind_type
      15              :    USE basis_set_types,                 ONLY: gto_basis_set_type
      16              :    USE cell_types,                      ONLY: cell_type,&
      17              :                                               pbc
      18              :    USE cp_dbcsr_api,                    ONLY: &
      19              :         dbcsr_add, dbcsr_binary_read, dbcsr_binary_write, dbcsr_copy, dbcsr_create, &
      20              :         dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &
      21              :         dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
      22              :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
      23              :         dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, &
      24              :         dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
      25              :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      26              :                                               copy_fm_to_dbcsr,&
      27              :                                               dbcsr_deallocate_matrix_set
      28              :    USE cp_files,                        ONLY: close_file,&
      29              :                                               open_file
      30              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      31              :                                               cp_fm_release,&
      32              :                                               cp_fm_type
      33              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      34              :                                               cp_logger_type
      35              :    USE cp_output_handling,              ONLY: cp_p_file,&
      36              :                                               cp_print_key_should_output
      37              :    USE gw_integrals,                    ONLY: build_3c_integral_block
      38              :    USE gw_large_cell_gamma,             ONLY: G_occ_vir,&
      39              :                                               compute_QP_energies,&
      40              :                                               delete_unnecessary_files,&
      41              :                                               fill_fm_Sigma_c_Gamma_time,&
      42              :                                               get_W_MIC,&
      43              :                                               multiply_fm_W_MIC_time_with_Minv_Gamma
      44              :    USE gw_utils,                        ONLY: de_init_bs_env
      45              :    USE input_section_types,             ONLY: section_vals_type
      46              :    USE kinds,                           ONLY: dp
      47              :    USE machine,                         ONLY: m_walltime
      48              :    USE message_passing,                 ONLY: mp_para_env_type
      49              :    USE mp2_ri_2c,                       ONLY: RI_2c_integral_mat
      50              :    USE orbital_pointers,                ONLY: indco,&
      51              :                                               ncoset
      52              :    USE particle_types,                  ONLY: particle_type
      53              :    USE post_scf_bandstructure_types,    ONLY: post_scf_bandstructure_type
      54              :    USE qs_environment_types,            ONLY: get_qs_env,&
      55              :                                               qs_environment_type
      56              :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      57              :                                               qs_kind_type
      58              : #include "./base/base_uses.f90"
      59              : 
      60              :    IMPLICIT NONE
      61              : 
      62              :    PRIVATE
      63              : 
      64              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gw_large_cell_Gamma_ri_rs'
      65              : 
      66              :    PUBLIC :: gw_calc_large_cell_Gamma_ri_rs
      67              : 
      68              : CONTAINS
      69              : 
      70              : ! **************************************************************************************************
      71              : !> \brief GW calculation using RI-RS formalism
      72              : !> \param qs_env ...
      73              : !> \param bs_env ...
      74              : ! **************************************************************************************************
      75              : 
      76           10 :    SUBROUTINE gw_calc_large_cell_Gamma_ri_rs(qs_env, bs_env)
      77              : 
      78              :       TYPE(qs_environment_type), POINTER                 :: qs_env
      79              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
      80              : 
      81              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'gw_calc_large_cell_Gamma_ri_rs'
      82              : 
      83              :       INTEGER                                            :: handle
      84           10 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_Sigma_x_Gamma, fm_W_MIC_time
      85           10 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: fm_Sigma_c_Gamma_time
      86              : 
      87           10 :       CALL timeset(routineN, handle)
      88              : 
      89              :        !!========================================================================
      90              :        !! 1. Grid Generation for RI-RS
      91              :        !!    (Modified Lebedev grids from Ivan Duchemin and Xavier Blase)
      92              :        !!    Generate flattened 1D array of grid points for RI-RS.
      93              :        !!    Equation: r_g(k) = R_A + r_g(A)
      94              :        !!========================================================================
      95           10 :       CALL ri_rs_grid_assembler(qs_env, bs_env, bs_env%ri_rs%grid_points)
      96              : 
      97              :        !!========================================================================
      98              :        !! 2. Atomic Basis Evaluation
      99              :        !!    Compute values of spherical atomic basis functions at grid points.
     100              :        !!    Expression: Φ_μl = Φ_μ(r_l) (mat_phi_mu_l)
     101              :        !!========================================================================
     102              :       CALL atomic_basis_at_grid_point(qs_env, bs_env, bs_env%ri_rs%grid_points, &
     103           10 :                                       bs_env%ri_rs%mat_phi_mu_l)
     104              : 
     105              :        !!========================================================================
     106              :        !! 3. Compute RI-RS Coefficients (Z_lp)
     107              :        !!    Solve the regularized system for each atom P, where the grid domain
     108              :        !!    is restricted to r_l within a cutoff distance of atom P:
     109              :        !!    a. D_ll' = [ Σ_μ Φ_μ(r_l) Φ_μ(r_l') ]^2 (Equation 13)
     110              :        !!    b. D_lP  = Σ_{μν} Φ_μ(r_l) Φ_ν(r_l) (μν|P) (Equation 15)
     111              :        !!    c. Conditioning:
     112              :        !!       Dvec_l   = 1 / sqrt(D_ll)  (Diagonal scaling vector)
     113              :        !!       D'_ll' = Dvec_l * D_ll' * Dvec_l' + λδ_ll'
     114              :        !!       D'_lP  = Dvec_l * D_lP
     115              :        !!    d. Solve: Σ_l' D'_ll' * Z'_l'P = D'_lP (Equation 14)
     116              :        !!    e. Rescale: Z_lP = Z'_lP * Dvec_l   (Z_lP stored in mat_Z_lP)
     117              :        !!========================================================================
     118              :       CALL compute_coeff_Z_lP(qs_env, bs_env, bs_env%ri_rs%grid_points, &
     119           10 :                               bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
     120              : 
     121              :        !!========================================================================
     122              :        !! 4. Compute Independent-Particle Polarizability (χ)
     123              :        !!    G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
     124              :        !!    G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
     125              :        !!    G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
     126              :        !!    G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
     127              :        !!    χ_ll'(iτ,k=0)       = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
     128              :        !!    χ_PQ(iτ,k=0)        = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
     129              :        !!========================================================================
     130              :       CALL get_mat_chi_Gamma_tau(bs_env, bs_env%mat_chi_Gamma_tau, &
     131           10 :                                  bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
     132              : 
     133              :        !!========================================================================
     134              :        !! 5. Compute Screened Interaction (W^MIC)
     135              :        !!    χ_PQ(iτ,k=0) -> χ_PQ(iω,k) -> ε_PQ(iω,k) -> W_PQ(iω,k) -> W^MIC_PQ(iτ)
     136              :        !!========================================================================
     137           10 :       CALL get_W_MIC(bs_env, qs_env, bs_env%mat_chi_Gamma_tau, fm_W_MIC_time)
     138              : 
     139              :        !!========================================================================
     140              :        !! 6. Compute Exact Exchange Self-Energy (Σ^x)
     141              :        !!    D_µν          = sum_n^occ C_µn(k=0) C_νn(k=0)
     142              :        !!    D_ll'         = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
     143              :        !!    V^trunc_ll'   = sum_PQ Z_lP V^trunc_PQ Z_l'Q
     144              :        !!    Σ^x_ll'       = D_ll' * V^trunc_ll'
     145              :        !!    Σ^x_λσ(k=0)   = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
     146              :        !!========================================================================
     147              :       CALL compute_Sigma_x(bs_env, qs_env, bs_env%ri_rs%mat_phi_mu_l, &
     148           10 :                            bs_env%ri_rs%mat_Z_lP, fm_Sigma_x_Gamma)
     149              : 
     150              :        !!========================================================================
     151              :        !! 7. Compute Correlation Self-Energy (Σ^c)
     152              :        !!    W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
     153              :        !!    Σ^c_ll'(iτ,k=0)   = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
     154              :        !!    Σ^c_ll'(iτ,k=0)   =  G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
     155              :        !!    Σ^c_λσ(iτ,k=0)    = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l')
     156              :        !!========================================================================
     157              :       CALL compute_Sigma_c(bs_env, fm_W_MIC_time, bs_env%ri_rs%mat_phi_mu_l, &
     158           10 :                            bs_env%ri_rs%mat_Z_lP, fm_Sigma_c_Gamma_time)
     159              : 
     160              :        !!========================================================================
     161              :        !! 8. Compute Quasiparticle Energies
     162              :        !!    Σ^c_λσ(iτ,k=0) -> Σ^c_nn(ϵ,k)
     163              :        !!    ϵ_nk^GW = ϵ_nk^DFT + Σ^c_nn(ϵ,k) + Σ^x_nn(k) - v^xc_nn(k)
     164              :        !!========================================================================
     165           10 :       CALL compute_QP_energies(bs_env, qs_env, fm_Sigma_x_Gamma, fm_Sigma_c_Gamma_time)
     166              : 
     167           10 :       CALL de_init_bs_env(bs_env)
     168              : 
     169           10 :       CALL timestop(handle)
     170              : 
     171           10 :    END SUBROUTINE gw_calc_large_cell_Gamma_ri_rs
     172              : 
     173              : ! **************************************************************************************************
     174              : !> \brief Compute grid points for RI-RS
     175              : !>        Right now based on Ivan and Xavier implementation
     176              : !>        JCP 150, 174120 (2019), JCTC 17, 2383 (2021)
     177              : !> \param qs_env ...
     178              : !> \param bs_env ...
     179              : !> \param ri_rs_grid_points ...
     180              : ! **************************************************************************************************
     181              : 
     182           10 :    SUBROUTINE ri_rs_grid_assembler(qs_env, bs_env, ri_rs_grid_points)
     183              : 
     184              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     185              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     186              :       REAL(KIND=dp), ALLOCATABLE, INTENT(OUT)            :: ri_rs_grid_points(:, :)
     187              : 
     188              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'ri_rs_grid_assembler'
     189              : 
     190              :       INTEGER                                            :: atom_idx, end_idx, handle, ikind, j, &
     191              :                                                             natom, nkind, start_idx, &
     192              :                                                             total_grid_npts
     193           10 :       INTEGER, ALLOCATABLE                               :: atom_to_kind(:), ri_rs_grid_offsets(:)
     194              :       REAL(KIND=dp)                                      :: atomic_center(3)
     195           10 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     196           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     197              : 
     198           10 :       CALL timeset(routineN, handle)
     199              : 
     200              :        !! Get the information about the atoms in the system
     201           10 :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
     202              : 
     203           10 :       nkind = SIZE(atomic_kind_set)
     204           10 :       natom = SIZE(particle_set)
     205              : 
     206              :        !! 1. Build the grid cache
     207           10 :       CALL build_grid_cache(bs_env, atomic_kind_set)
     208              : 
     209              :        !! 2. Calculate grid counts and offsets
     210           30 :       ALLOCATE (ri_rs_grid_offsets(natom + 1))
     211           30 :       ALLOCATE (atom_to_kind(natom))
     212              : 
     213           10 :       total_grid_npts = 0
     214           28 :       DO ikind = 1, nkind
     215           56 :          DO j = 1, SIZE(atomic_kind_set(ikind)%atom_list)
     216           28 :             atom_idx = atomic_kind_set(ikind)%atom_list(j)
     217           28 :             atom_to_kind(atom_idx) = ikind
     218           28 :             ri_rs_grid_offsets(atom_idx) = total_grid_npts + 1
     219           46 :             total_grid_npts = total_grid_npts + bs_env%ri_rs%grid_cache(ikind)%npts
     220              :          END DO
     221              :       END DO
     222              : 
     223           10 :       ri_rs_grid_offsets(natom + 1) = total_grid_npts + 1
     224              : 
     225           10 :       IF (bs_env%unit_nr > 0) THEN
     226              :          WRITE (bs_env%unit_nr, FMT="(T2,A,T69,I12)") &
     227            5 :             'Total grid points used for RI-RS:', total_grid_npts
     228            5 :          WRITE (bs_env%unit_nr, "(A)") ' '
     229              :       END IF
     230              : 
     231              :        !! 3. Allocate the global ri_rs_grid arrays
     232           30 :       ALLOCATE (ri_rs_grid_points(3, total_grid_npts))
     233              : 
     234              :        !! 4. Parallelize the grid generation loop
     235              :       !$OMP PARALLEL DO DEFAULT(NONE) &
     236              :       !$OMP SHARED(ri_rs_grid_points, ri_rs_grid_offsets, atom_to_kind, &
     237              :       !$OMP        particle_set, bs_env, natom) &
     238              :       !$OMP PRIVATE(atom_idx, ikind, atomic_center, start_idx, end_idx) &
     239           10 :       !$OMP SCHEDULE(DYNAMIC, 1)
     240              :       DO atom_idx = 1, natom
     241              :          ikind = atom_to_kind(atom_idx)
     242              :          atomic_center(:) = particle_set(atom_idx)%r(:)
     243              : 
     244              :          start_idx = ri_rs_grid_offsets(atom_idx)
     245              :          ! offsets are filled in kind-grouped order, so offsets(atom_idx+1) belongs to an
     246              :          ! unrelated atom when elements are interleaved in the geometry; span from own kind
     247              :          end_idx = start_idx + bs_env%ri_rs%grid_cache(ikind)%npts - 1
     248              : 
     249              :           !! Shift the cached origin grid by the atom's center
     250              :          ri_rs_grid_points(1, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(1, :) + atomic_center(1)
     251              :          ri_rs_grid_points(2, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(2, :) + atomic_center(2)
     252              :          ri_rs_grid_points(3, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(3, :) + atomic_center(3)
     253              : 
     254              :       END DO
     255              :       !$OMP END PARALLEL DO
     256              : 
     257              :        !! 5. Cleanup memory
     258           10 :       IF (ALLOCATED(bs_env%ri_rs%grid_cache)) THEN
     259           28 :          DO ikind = 1, nkind
     260           28 :             IF (ALLOCATED(bs_env%ri_rs%grid_cache(ikind)%raw_points)) DEALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points)
     261              :          END DO
     262           28 :          DEALLOCATE (bs_env%ri_rs%grid_cache)
     263              :       END IF
     264              : 
     265           10 :       DEALLOCATE (atom_to_kind, ri_rs_grid_offsets)
     266           10 :       CALL timestop(handle)
     267              : 
     268           20 :    END SUBROUTINE ri_rs_grid_assembler
     269              : 
     270              : ! **************************************************************************************************
     271              : !> \brief Reads grids from .ion files and stores them in memory based on grid_select
     272              : !> \param bs_env ...
     273              : !> \param atomic_kind_set ...
     274              : ! **************************************************************************************************
     275              : 
     276           10 :    SUBROUTINE build_grid_cache(bs_env, atomic_kind_set)
     277              : 
     278              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     279              :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     280              : 
     281              :       CHARACTER(LEN=256)                                 :: atom_sym, filename, full_path, line, &
     282              :                                                             suffix
     283              :       INTEGER                                            :: colon_idx, i, ierr, ikind, iunit, nkind
     284              :       REAL(KIND=dp)                                      :: pt(3)
     285              : 
     286              :       ! Determine the file suffix based on the user's choice
     287           10 :       IF (bs_env%ri_rs%grid_select == 1) THEN
     288           10 :          suffix = "_def2-tzvp-rs.ion"
     289            0 :       ELSE IF (bs_env%ri_rs%grid_select == 2) THEN
     290            0 :          suffix = "_cc-pvtz-rs.ion"
     291              :       ELSE
     292            0 :          CPABORT("Unknown grid_select value. Valid options are 1 (def2-TZVPP) or 2 (cc-pVTZ).")
     293              :       END IF
     294              : 
     295           10 :       nkind = SIZE(atomic_kind_set)
     296           48 :       IF (.NOT. ALLOCATED(bs_env%ri_rs%grid_cache)) ALLOCATE (bs_env%ri_rs%grid_cache(nkind))
     297              : 
     298           28 :       DO ikind = 1, nkind
     299           18 :          atom_sym = TRIM(atomic_kind_set(ikind)%element_symbol)
     300           18 :          filename = TRIM(atom_sym)//TRIM(suffix)
     301              : 
     302           18 :          full_path = "ri_rs_grid/"//TRIM(filename)
     303              : 
     304              :          CALL open_file(file_name=TRIM(full_path), unit_number=iunit, &
     305           18 :                         file_action="READ", file_status="OLD")
     306              : 
     307              :          ! Parse the preamble for 'n points'
     308           18 :          bs_env%ri_rs%grid_cache(ikind)%npts = 0
     309              :          DO
     310          612 :             READ (iunit, '(A)', IOSTAT=ierr) line
     311          612 :             IF (ierr /= 0) EXIT
     312          612 :             IF (INDEX(line, 'n points') > 0) THEN
     313           18 :                colon_idx = INDEX(line, ':')
     314           18 :                READ (line(colon_idx + 1:), *) bs_env%ri_rs%grid_cache(ikind)%npts
     315           18 :                EXIT
     316              :             END IF
     317              :          END DO
     318              : 
     319              :          ! Allocate the cache array for this specific element
     320           54 :          ALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points(3, bs_env%ri_rs%grid_cache(ikind)%npts))
     321              : 
     322              :          ! Fast-forward to the <grid_points> tag
     323           18 :          REWIND (iunit)
     324              :          DO
     325          648 :             READ (iunit, '(A)', IOSTAT=ierr) line
     326          648 :             IF (ierr /= 0) EXIT
     327          648 :             IF (INDEX(line, '<grid_points>') > 0) EXIT
     328              :          END DO
     329              : 
     330              :          ! Read the raw grid coordinates
     331         4466 :          DO i = 1, bs_env%ri_rs%grid_cache(ikind)%npts
     332         4448 :             READ (iunit, *, IOSTAT=ierr) pt(1), pt(2), pt(3)
     333         4448 :             IF (ierr /= 0) THEN
     334            0 :                CPABORT("Unexpected EOF in grid file ")
     335              :             END IF
     336        17810 :             bs_env%ri_rs%grid_cache(ikind)%raw_points(:, i) = pt(:)
     337              :          END DO
     338              : 
     339           28 :          CALL close_file(unit_number=iunit)
     340              :       END DO
     341              : 
     342           10 :    END SUBROUTINE build_grid_cache
     343              : 
     344              : ! **************************************************************************************************
     345              : !> \brief Evaluates atomic basis functions on a real-space grid and builds a sparse DBCSR matrix.
     346              : !> \param qs_env ...
     347              : !> \param bs_env ...
     348              : !> \param ri_rs_grid_points ...
     349              : !> \param mat_phi_mu_l ...
     350              : ! **************************************************************************************************
     351              : 
     352           10 :    SUBROUTINE atomic_basis_at_grid_point(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l)
     353              : 
     354              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     355              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     356              :       REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT)          :: ri_rs_grid_points(:, :)
     357              :       TYPE(dbcsr_type), INTENT(OUT)                      :: mat_phi_mu_l
     358              : 
     359              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'atomic_basis_at_grid_point'
     360              : 
     361              :       INTEGER :: c_size, chunk_size, dimen_ORB, handle, i, i_blk, iatom, natom, npcol, nprow, &
     362              :          num_grid_chunks, r_end, r_start, total_grid_npts
     363              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf
     364           10 :       INTEGER, DIMENSION(:), POINTER                     :: c_blk_sizes, col_dist, col_dist_ks, &
     365           10 :                                                             r_blk_sizes, row_dist, row_dist_ks
     366           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: atom_col_buffer
     367           10 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     368              :       TYPE(cell_type), POINTER                           :: cell
     369              :       TYPE(dbcsr_distribution_type)                      :: dist
     370              :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist_ks
     371              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     372           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     373           10 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     374              : 
     375           10 :       CALL timeset(routineN, handle)
     376              : 
     377              :       ! Setup Grid Blocking
     378              :       ! Right now using 256
     379           10 :       chunk_size = bs_env%ri_rs%chunk_size_dbcsr
     380              : 
     381              :       ! Extract environment variables
     382              :       CALL get_qs_env(qs_env, cell=cell, atomic_kind_set=atomic_kind_set, &
     383              :                       qs_kind_set=qs_kind_set, particle_set=particle_set, &
     384           10 :                       para_env=para_env)
     385              : 
     386           10 :       natom = SIZE(particle_set)
     387           10 :       total_grid_npts = SIZE(ri_rs_grid_points, 2)
     388              : 
     389              :       ! Map the starting indices of spherical gaussian functions (SGF) for each atom
     390           30 :       ALLOCATE (first_sgf(natom + 1))
     391           10 :       CALL get_basis_offsets(particle_set, qs_kind_set, first_sgf, dimen_ORB)
     392              : 
     393              :       ! =========================================================================
     394              :       ! 1. SETUP DBCSR MATRIX TOPOLOGY
     395              :       ! =========================================================================
     396              : 
     397              :       ! A. Define Column Block Sizes (1 Block = 1 Atom's full basis set)
     398           30 :       ALLOCATE (c_blk_sizes(natom))
     399           38 :       DO iatom = 1, natom
     400           38 :          c_blk_sizes(iatom) = first_sgf(iatom + 1) - first_sgf(iatom)
     401              :       END DO
     402              : 
     403              :       ! B. Define Row Block Sizes (Grid chunks of max size 256)
     404           10 :       num_grid_chunks = CEILING(REAL(total_grid_npts, KIND=dp)/REAL(chunk_size, KIND=dp))
     405           30 :       ALLOCATE (r_blk_sizes(num_grid_chunks))
     406           40 :       r_blk_sizes = chunk_size
     407           10 :       IF (MOD(total_grid_npts, chunk_size) /= 0) THEN
     408           10 :          r_blk_sizes(num_grid_chunks) = MOD(total_grid_npts, chunk_size)
     409              :       END IF
     410              : 
     411              :       ! C. Fetch CP2K's Default Process Grid Configuration
     412           10 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist_ks)
     413           10 :       CALL dbcsr_distribution_get(dbcsr_dist_ks, row_dist=row_dist_ks, col_dist=col_dist_ks)
     414              : 
     415              :       ! D. Build Custom Mappings using Round-Robin across the 2D process grid
     416              :       !    (MAXVAL + 1 accounts for 0-based process indexing in DBCSR)
     417           38 :       nprow = MAXVAL(row_dist_ks) + 1
     418           38 :       npcol = MAXVAL(col_dist_ks) + 1
     419              : 
     420           20 :       ALLOCATE (row_dist(num_grid_chunks))
     421           40 :       DO i = 1, num_grid_chunks
     422           40 :          row_dist(i) = MOD(i - 1, nprow)
     423              :       END DO
     424              : 
     425           20 :       ALLOCATE (col_dist(natom))
     426           38 :       DO i = 1, natom
     427           38 :          col_dist(i) = MOD(i - 1, npcol)
     428              :       END DO
     429              : 
     430              :       ! E. Create the DBCSR Distribution and Initialize the Matrix
     431              :       CALL dbcsr_distribution_new(dist, template=dbcsr_dist_ks, &
     432           10 :                                   row_dist=row_dist, col_dist=col_dist)
     433              : 
     434              :       CALL dbcsr_create(mat_phi_mu_l, name="phi_val_sparse", dist=dist, &
     435              :                         matrix_type=dbcsr_type_no_symmetry, &
     436           10 :                         row_blk_size=r_blk_sizes, col_blk_size=c_blk_sizes)
     437              : 
     438              :       ! =========================================================================
     439              :       ! 2. STREAM DATA DIRECTLY INTO SPARSE MATRIX
     440              :       ! =========================================================================
     441              :       ! Iterate over the atoms assigned to this specific MPI rank
     442           10 :       DO iatom = para_env%mepos + 1, natom, para_env%num_pe
     443              : 
     444           14 :          c_size = c_blk_sizes(iatom)
     445              : 
     446              :          ! Allocate a temporary dense buffer just for this specific atom
     447           56 :          ALLOCATE (atom_col_buffer(total_grid_npts, c_size))
     448           14 :          atom_col_buffer = 0.0_dp
     449              : 
     450              :          ! Evaluate the basis functions on the grid
     451              :          CALL fill_phi_for_atom(atom_col_buffer, ri_rs_grid_points, total_grid_npts, &
     452           14 :                                 iatom, particle_set, qs_kind_set, cell)
     453              : 
     454              :          ! Slice the dense column into chunks and insert into DBCSR
     455           56 :          DO i_blk = 1, num_grid_chunks
     456           42 :             r_start = (i_blk - 1)*chunk_size + 1
     457           42 :             r_end = MIN(i_blk*chunk_size, total_grid_npts)
     458              : 
     459              :             ! Apply dynamic sparsity filtering: Only store blocks with physical significance
     460        23914 :             IF (MAXVAL(ABS(atom_col_buffer(r_start:r_end, 1:c_size))) > bs_env%eps_filter) THEN
     461              :                CALL dbcsr_put_block(mat_phi_mu_l, row=i_blk, col=iatom, &
     462           42 :                                     block=atom_col_buffer(r_start:r_end, 1:c_size))
     463              :             END IF
     464              :          END DO
     465              : 
     466           14 :          DEALLOCATE (atom_col_buffer)
     467              : 
     468              :       END DO
     469              : 
     470              :       ! Finalize triggers internal MPI communication to route blocks to their correct 2D process owners
     471           10 :       CALL dbcsr_finalize(mat_phi_mu_l)
     472              : 
     473              :       ! -------------------------------------------------------------------------
     474              :       ! CLEANUP
     475              :       ! -------------------------------------------------------------------------
     476           10 :       DEALLOCATE (first_sgf, r_blk_sizes, c_blk_sizes, row_dist, col_dist)
     477           10 :       CALL dbcsr_distribution_release(dist)
     478              : 
     479           10 :       CALL timestop(handle)
     480              : 
     481           40 :    END SUBROUTINE atomic_basis_at_grid_point
     482              : 
     483              : ! **************************************************************************************************
     484              : !> \brief Compute value of all basis function for single atom across all grid points
     485              : !> \param phi_val ...
     486              : !> \param ri_rs_grid ...
     487              : !> \param npts ...
     488              : !> \param iatom ...
     489              : !> \param particle_set ...
     490              : !> \param qs_kind_set ...
     491              : !> \param cell ...
     492              : ! **************************************************************************************************
     493              : 
     494           14 :    SUBROUTINE fill_phi_for_atom(phi_val, ri_rs_grid, npts, iatom, &
     495              :                                 particle_set, qs_kind_set, cell)
     496              : 
     497              :       REAL(KIND=dp), INTENT(INOUT)                       :: phi_val(:, :)
     498              :       INTEGER, INTENT(IN)                                :: npts
     499              :       REAL(KIND=dp), INTENT(IN)                          :: ri_rs_grid(3, npts)
     500              :       INTEGER, INTENT(IN)                                :: iatom
     501              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     502              :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     503              :       TYPE(cell_type), POINTER                           :: cell
     504              : 
     505              :       INTEGER                                            :: first_sgf, i_pt, ico, iend_co, ikind, &
     506              :                                                             ipgf, iset, isgf, ishell, istart_co, &
     507              :                                                             l, last_sgf, lx, ly, lz, n_cart_total, &
     508              :                                                             row_idx
     509              :       REAL(KIND=dp)                                      :: alpha, dist_vec(3), exp_val, poly, r2, &
     510              :                                                             r_atom(3), weight
     511              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
     512              : 
     513              :       ! Get Atom Info
     514           14 :       ikind = particle_set(iatom)%atomic_kind%kind_number
     515           14 :       CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
     516           14 :       IF (.NOT. ASSOCIATED(orb_basis_set)) RETURN
     517              : 
     518           56 :       r_atom = particle_set(iatom)%r
     519              : 
     520              :       !$OMP PARALLEL DO DEFAULT(NONE) &
     521              :       !$OMP SHARED(phi_val, ri_rs_grid, npts, orb_basis_set, r_atom, cell, ncoset, indco) &
     522              :       !$OMP PRIVATE(i_pt, dist_vec, r2, iset, n_cart_total, ishell, l, istart_co, iend_co, &
     523              :       !$OMP         first_sgf, last_sgf, ipgf, alpha, exp_val, isgf, ico, row_idx, weight, &
     524              :       !$OMP         lx, ly, lz, poly) &
     525           14 :       !$OMP SCHEDULE(STATIC)
     526              :       DO i_pt = 1, npts
     527              :          dist_vec = pbc(ri_rs_grid(:, i_pt) - r_atom, cell)
     528              :          r2 = DOT_PRODUCT(dist_vec, dist_vec)
     529              : 
     530              :          DO iset = 1, orb_basis_set%nset
     531              :             n_cart_total = ncoset(orb_basis_set%lmax(iset))
     532              : 
     533              :             DO ishell = 1, orb_basis_set%nshell(iset)
     534              :                l = orb_basis_set%l(ishell, iset)
     535              :                istart_co = ncoset(l - 1) + 1
     536              :                iend_co = ncoset(l)
     537              : 
     538              :                first_sgf = orb_basis_set%first_sgf(ishell, iset)
     539              :                last_sgf = orb_basis_set%last_sgf(ishell, iset)
     540              : 
     541              :                DO ipgf = 1, orb_basis_set%npgf(iset)
     542              :                   alpha = orb_basis_set%zet(ipgf, iset)
     543              :                   exp_val = EXP(-alpha*r2)
     544              : 
     545              :                   DO isgf = first_sgf, last_sgf
     546              :                      DO ico = istart_co, iend_co
     547              :                         row_idx = (ipgf - 1)*n_cart_total + ico
     548              :                         weight = orb_basis_set%sphi(row_idx, isgf)
     549              :                         lx = indco(1, ico)
     550              :                         ly = indco(2, ico)
     551              :                         lz = indco(3, ico)
     552              :                         poly = (dist_vec(1)**lx)*(dist_vec(2)**ly)*(dist_vec(3)**lz)
     553              : 
     554              :                         phi_val(i_pt, isgf) = phi_val(i_pt, isgf) + (weight*poly*exp_val)
     555              : 
     556              :                      END DO
     557              :                   END DO
     558              :                END DO
     559              :             END DO
     560              :          END DO
     561              :       END DO
     562              :       !$OMP END PARALLEL DO
     563              : 
     564              :    END SUBROUTINE fill_phi_for_atom
     565              : 
     566              : ! **************************************************************************************************
     567              : !> \brief Helper for OMP threads to fill phi_val column values
     568              : !> \param particle_set ...
     569              : !> \param qs_kind_set ...
     570              : !> \param first_sgf ...
     571              : !> \param total_sgf ...
     572              : ! **************************************************************************************************
     573              : 
     574           10 :    SUBROUTINE get_basis_offsets(particle_set, qs_kind_set, first_sgf, total_sgf)
     575              : 
     576              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     577              :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     578              :       INTEGER, INTENT(OUT)                               :: first_sgf(:), total_sgf
     579              : 
     580              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_basis_offsets'
     581              : 
     582              :       INTEGER                                            :: handle, iatom, ikind, nsgf
     583              : 
     584           10 :       CALL timeset(routineN, handle)
     585              : 
     586           10 :       total_sgf = 0
     587           38 :       DO iatom = 1, SIZE(particle_set)
     588           28 :          first_sgf(iatom) = total_sgf + 1
     589           28 :          ikind = particle_set(iatom)%atomic_kind%kind_number
     590           28 :          CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf, basis_type="ORB")
     591           38 :          total_sgf = total_sgf + nsgf
     592              :       END DO
     593           10 :       first_sgf(SIZE(particle_set) + 1) = total_sgf + 1
     594              : 
     595           10 :       CALL timestop(handle)
     596              : 
     597           10 :    END SUBROUTINE get_basis_offsets
     598              : 
     599              : ! **************************************************************************************************
     600              : !> \brief Compute RI-RS Coefficients (Z_lP)
     601              : !> \param qs_env ...
     602              : !> \param bs_env ...
     603              : !> \param ri_rs_grid_points ...
     604              : !> \param mat_phi_mu_l ...
     605              : !> \param mat_Z_lP ...
     606              : ! **************************************************************************************************
     607              : 
     608           10 :    SUBROUTINE compute_coeff_Z_lP(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l, mat_Z_lP)
     609              : 
     610              :       ! Arguments
     611              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     612              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     613              :       REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT)          :: ri_rs_grid_points(:, :)
     614              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l
     615              :       TYPE(dbcsr_type), INTENT(OUT)                      :: mat_Z_lP
     616              : 
     617              :       CHARACTER(LEN=*), PARAMETER :: key = 'PROPERTIES%BANDSTRUCTURE%GW%PRINT%RESTART', &
     618              :          routineN = 'compute_coeff_Z_lP'
     619              : 
     620              :       INTEGER :: atom_P, chunk_size, col, current_chunk_size, global_i, global_j, group_handle, &
     621              :          handle, i, i_blk, info, istart_ao, j, j_ri, l, loc_idx, max_ao_size, max_loc_ri, &
     622              :          n_ao_total, n_grid_total, n_loc_ri, n_local_grid, natom, num_grid_chunks, r_end, r_start, &
     623              :          row
     624           10 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_grid_idx, map_global_to_local, &
     625           10 :                                                             row_offset
     626           10 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist_phi, col_dist_ri, r_blk_sizes, &
     627           10 :                                                             ri_blk_sizes, row_dist_grid
     628              :       REAL(KIND=dp)                                      :: cutoff_ri, dist, t1
     629           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: d_vec_local
     630           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: D_local, d_lp_local, int_2d_local, &
     631           10 :                                                             phi_global, phi_local, rho_pair_local, &
     632           10 :                                                             Z_blk, Z_global_temp
     633           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: int_3c_local
     634              :       REAL(KIND=dp), DIMENSION(3)                        :: pos_P
     635           10 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_ptr
     636              :       TYPE(cp_logger_type), POINTER                      :: logger
     637              :       TYPE(dbcsr_distribution_type)                      :: dist_phi, dist_Z
     638              :       TYPE(dbcsr_iterator_type)                          :: iter
     639              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     640           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     641              :       TYPE(section_vals_type), POINTER                   :: input
     642              : 
     643           10 :       CALL timeset(routineN, handle)
     644              : 
     645              :       ! Restrict integration domain to a localized sphere around the atom
     646           10 :       cutoff_ri = bs_env%ri_rs%cutoff_radius_ri_rs
     647              : 
     648           10 :       t1 = m_walltime()
     649              : 
     650           10 :       CALL get_qs_env(qs_env, para_env=para_env, particle_set=particle_set, input=input)
     651              : 
     652           10 :       natom = SIZE(bs_env%i_RI_start_from_atom)
     653           10 :       n_ao_total = bs_env%i_ao_end_from_atom(natom)
     654           10 :       n_grid_total = SIZE(ri_rs_grid_points, 2)
     655              : 
     656              :       ! =========================================================================
     657              :       ! 1. SETUP DBCSR TOPOLOGY & EXACT OFFSETS
     658              :       ! =========================================================================
     659           10 :       CALL dbcsr_get_info(mat_phi_mu_l, row_blk_size=r_blk_sizes, distribution=dist_phi)
     660           10 :       CALL dbcsr_distribution_get(dist_phi, row_dist=row_dist_grid, col_dist=col_dist_phi, group=group_handle)
     661              : 
     662           10 :       chunk_size = r_blk_sizes(1)
     663           10 :       num_grid_chunks = SIZE(r_blk_sizes)
     664              : 
     665           30 :       ALLOCATE (row_offset(num_grid_chunks))
     666           10 :       row_offset(1) = 0
     667           30 :       DO i_blk = 2, num_grid_chunks
     668           30 :          row_offset(i_blk) = row_offset(i_blk - 1) + r_blk_sizes(i_blk - 1)
     669              :       END DO
     670              : 
     671           40 :       ALLOCATE (ri_blk_sizes(natom), col_dist_ri(natom))
     672           38 :       DO atom_P = 1, natom
     673           28 :          ri_blk_sizes(atom_P) = bs_env%i_RI_end_from_atom(atom_P) - bs_env%i_RI_start_from_atom(atom_P) + 1
     674          118 :          col_dist_ri(atom_P) = MOD(atom_P - 1, MAXVAL(col_dist_phi) + 1)
     675              :       END DO
     676              : 
     677           10 :       CALL dbcsr_distribution_new(dist_Z, template=dist_phi, row_dist=row_dist_grid, col_dist=col_dist_ri)
     678              : 
     679           10 :       IF (bs_env%ri_rs%Z_lP_exists) THEN
     680              :          CALL dbcsr_binary_read(filepath=TRIM(bs_env%prefix)//"Z_lP.matrix", &
     681              :                                 distribution=dist_Z, &
     682            2 :                                 matrix_new=mat_Z_lP)
     683            2 :          IF (bs_env%unit_nr > 0) THEN
     684              :             WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
     685            1 :                'Read Z_lP from file ', ' Execution time', m_walltime() - t1, ' s'
     686            1 :             WRITE (bs_env%unit_nr, '(A)') ' '
     687              :          END IF
     688              :       ELSE
     689              : 
     690              :          CALL dbcsr_create(mat_Z_lP, name="mat_Z_lP", dist=dist_Z, &
     691              :                            matrix_type=dbcsr_type_no_symmetry, &
     692            8 :                            row_blk_size=r_blk_sizes, col_blk_size=ri_blk_sizes)
     693              : 
     694            8 :          max_ao_size = 0
     695           30 :          DO j = 1, SIZE(bs_env%i_ao_start_from_atom)
     696           30 :             max_ao_size = MAX(max_ao_size, bs_env%i_ao_end_from_atom(j) - bs_env%i_ao_start_from_atom(j) + 1)
     697              :          END DO
     698           30 :          max_loc_ri = MAXVAL(ri_blk_sizes)
     699              : 
     700              :          ! Global mapping array: 4 bytes * N_grid_total (~4MB per million grid points)
     701           24 :          ALLOCATE (map_global_to_local(n_grid_total))
     702              : 
     703              :          ! =========================================================================
     704              :          ! 2. PRE-COMPUTE: EXTRACT FULL DBCSR TO DENSE GLOBAL ARRAY
     705              :          ! =========================================================================
     706           32 :          ALLOCATE (phi_global(n_grid_total, n_ao_total))
     707            8 :          phi_global = 0.0_dp
     708              : 
     709            8 :          CALL dbcsr_iterator_start(iter, mat_phi_mu_l)
     710           41 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     711           33 :             CALL dbcsr_iterator_next_block(iter, row, col, block_ptr)
     712              : 
     713           33 :             IF (col < 1 .OR. col > natom) CYCLE
     714           33 :             istart_ao = bs_env%i_ao_start_from_atom(col)
     715              : 
     716          134 :             DO j = 1, SIZE(block_ptr, 2)
     717           93 :                global_j = istart_ao + j - 1
     718           93 :                IF (global_j < 1 .OR. global_j > n_ao_total) CYCLE
     719              : 
     720        19614 :                DO i = 1, SIZE(block_ptr, 1)
     721        19488 :                   global_i = row_offset(row) + i
     722        19488 :                   IF (global_i < 1 .OR. global_i > n_grid_total) CYCLE
     723              : 
     724              :                   ! Insert directly into the global dense array
     725        19581 :                   phi_global(global_i, global_j) = block_ptr(i, j)
     726              :                END DO
     727              :             END DO
     728              :          END DO
     729            8 :          CALL dbcsr_iterator_stop(iter)
     730              : 
     731            8 :          CALL para_env%sum(phi_global)
     732              : 
     733              :          ! =========================================================================
     734              :          ! 3. MPI LOOP OVER ATOMS (Fully independent, no MPI barriers inside)
     735              :          ! This distributes the N_atoms among the available MPI processors.
     736              :          ! =========================================================================
     737            8 :          DO atom_P = para_env%mepos + 1, natom, para_env%num_pe
     738              : 
     739           11 :             n_loc_ri = ri_blk_sizes(atom_P)
     740           44 :             pos_P(:) = particle_set(atom_P)%r(:)
     741              : 
     742              :             ! ---------------------------------------------------------------------
     743              :             ! A. Determine Local Grid Domain based on cutoff_ri
     744              :             ! ---------------------------------------------------------------------
     745           11 :             n_local_grid = 0
     746         6827 :             DO l = 1, n_grid_total
     747        27264 :                dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
     748         6827 :                IF (dist <= cutoff_ri) n_local_grid = n_local_grid + 1
     749              :             END DO
     750              : 
     751           33 :             ALLOCATE (local_grid_idx(n_local_grid))
     752           11 :             map_global_to_local = 0
     753              : 
     754           11 :             n_local_grid = 0
     755         6827 :             DO l = 1, n_grid_total
     756        27264 :                dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
     757         6827 :                IF (dist <= cutoff_ri) THEN
     758         6816 :                   n_local_grid = n_local_grid + 1
     759         6816 :                   local_grid_idx(n_local_grid) = l
     760         6816 :                   map_global_to_local(l) = n_local_grid
     761              :                END IF
     762              :             END DO
     763              : 
     764              :             ! ---------------------------------------------------------------------
     765              :             ! B. Smart Fetch of Local Phi (MPI-Safe & Bounds-Checked)
     766              :             ! ---------------------------------------------------------------------
     767           44 :             ALLOCATE (phi_local(n_local_grid, n_ao_total))
     768              : 
     769              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     770              :             !$OMP SHARED(n_ao_total, n_local_grid, local_grid_idx, phi_local, phi_global) &
     771              :             !$OMP PRIVATE(global_j, loc_idx, global_i) &
     772           11 :             !$OMP SCHEDULE(STATIC)
     773              :             DO global_j = 1, n_ao_total
     774              :                DO loc_idx = 1, n_local_grid
     775              :                   global_i = local_grid_idx(loc_idx)
     776              :                   phi_local(loc_idx, global_j) = phi_global(global_i, global_j)
     777              :                END DO
     778              :             END DO
     779              :             !$OMP END PARALLEL DO
     780              : 
     781              :             ! ---------------------------------------------------------------------
     782              :             ! C. Build and Balance Local LHS Matrix (D_local)
     783              :             ! ---------------------------------------------------------------------
     784           66 :             ALLOCATE (D_local(n_local_grid, n_local_grid), d_vec_local(n_local_grid))
     785           11 :             D_local = 0.0_dp
     786              : 
     787           11 :             CALL dsyrk("L", "N", n_local_grid, n_ao_total, 1.0_dp, phi_local, n_local_grid, 0.0_dp, D_local, n_local_grid)
     788              : 
     789              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     790              :             !$OMP SHARED(n_local_grid, D_local, d_vec_local, bs_env) &
     791              :             !$OMP PRIVATE(i) &
     792           11 :             !$OMP SCHEDULE(STATIC)
     793              :             DO i = 1, n_local_grid
     794              :                D_local(i, i) = D_local(i, i)**2
     795              :                d_vec_local(i) = 1.0_dp/SQRT(MAX(D_local(i, i), 1.0E-16_dp))
     796              :                D_local(i, i) = (D_local(i, i)*d_vec_local(i)**2) + bs_env%ri_rs%tikhonov
     797              :             END DO
     798              :             !$OMP END PARALLEL DO
     799              : 
     800              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     801              :             !$OMP SHARED(n_local_grid, D_local, d_vec_local) &
     802              :             !$OMP PRIVATE(j, i) &
     803           11 :             !$OMP SCHEDULE(DYNAMIC)
     804              :             DO j = 1, n_local_grid
     805              :                DO i = j + 1, n_local_grid
     806              :                   D_local(i, j) = D_local(i, j)**2
     807              :                   D_local(i, j) = D_local(i, j)*d_vec_local(i)*d_vec_local(j)
     808              :                   D_local(j, i) = D_local(i, j)
     809              :                END DO
     810              :             END DO
     811              :             !$OMP END PARALLEL DO
     812              : 
     813              :             ! ---------------------------------------------------------------------
     814              :             ! D. Build Local RHS Matrix (d_lp_local)
     815              :             ! ---------------------------------------------------------------------
     816           44 :             ALLOCATE (d_lp_local(n_local_grid, n_loc_ri))
     817           44 :             ALLOCATE (rho_pair_local(n_local_grid, max_ao_size*max_ao_size))
     818           55 :             ALLOCATE (int_3c_local(max_ao_size, max_ao_size, n_loc_ri))
     819           44 :             ALLOCATE (int_2d_local(max_ao_size*max_ao_size, n_loc_ri))
     820              : 
     821           11 :             d_lp_local = 0.0_dp
     822              : 
     823              :             CALL compute_d_lp(qs_env, bs_env, phi_local, d_lp_local, n_local_grid, &
     824           11 :                               n_loc_ri, atom_P, rho_pair_local, int_3c_local, int_2d_local, max_ao_size)
     825              : 
     826              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     827              :             !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
     828              :             !$OMP PRIVATE(j_ri, i) &
     829           11 :             !$OMP SCHEDULE(STATIC)
     830              :             DO j_ri = 1, n_loc_ri
     831              :                DO i = 1, n_local_grid
     832              :                   d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
     833              :                END DO
     834              :             END DO
     835              :             !$OMP END PARALLEL DO
     836              : 
     837              :             ! ---------------------------------------------------------------------
     838              :             ! E. Fast Local LAPACK Solve
     839              :             ! ---------------------------------------------------------------------
     840              : 
     841           11 :             CALL dpotrf('L', n_local_grid, D_local, n_local_grid, info)
     842              : 
     843           11 :             CALL dpotrs('L', n_local_grid, n_loc_ri, D_local, n_local_grid, d_lp_local, n_local_grid, info)
     844              : 
     845              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     846              :             !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
     847              :             !$OMP PRIVATE(j_ri, i) &
     848           11 :             !$OMP SCHEDULE(STATIC)
     849              :             DO j_ri = 1, n_loc_ri
     850              :                DO i = 1, n_local_grid
     851              :                   d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
     852              :                END DO
     853              :             END DO
     854              :             !$OMP END PARALLEL DO
     855              : 
     856              :             ! ---------------------------------------------------------------------
     857              :             ! F. Scatter Local Solution Back to Global DBCSR Matrix
     858              :             ! ---------------------------------------------------------------------
     859           44 :             ALLOCATE (Z_global_temp(n_grid_total, n_loc_ri))
     860           11 :             Z_global_temp = 0.0_dp
     861              : 
     862              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     863              :             !$OMP SHARED(n_local_grid, local_grid_idx, Z_global_temp, n_loc_ri, d_lp_local) &
     864              :             !$OMP PRIVATE(loc_idx, global_i) &
     865           11 :             !$OMP SCHEDULE(STATIC)
     866              :             DO loc_idx = 1, n_local_grid
     867              :                global_i = local_grid_idx(loc_idx)
     868              :                Z_global_temp(global_i, 1:n_loc_ri) = d_lp_local(loc_idx, 1:n_loc_ri)
     869              :             END DO
     870              :             !$OMP END PARALLEL DO
     871              : 
     872           77 :             ALLOCATE (Z_blk(MAXVAL(r_blk_sizes), n_loc_ri))
     873              : 
     874           44 :             DO i_blk = 1, num_grid_chunks
     875           33 :                r_start = row_offset(i_blk) + 1
     876           33 :                r_end = row_offset(i_blk) + r_blk_sizes(i_blk)
     877           33 :                current_chunk_size = r_blk_sizes(i_blk)
     878              : 
     879           33 :                Z_blk = 0.0_dp
     880        53376 :                Z_blk(1:current_chunk_size, 1:n_loc_ri) = Z_global_temp(r_start:r_end, 1:n_loc_ri)
     881              : 
     882        53387 :                IF (MAXVAL(ABS(Z_blk(1:current_chunk_size, 1:n_loc_ri))) > bs_env%eps_filter) THEN
     883              :                   CALL dbcsr_put_block(mat_Z_lP, row=i_blk, col=atom_P, &
     884           33 :                                        block=Z_blk(1:current_chunk_size, 1:n_loc_ri))
     885              :                END IF
     886              :             END DO
     887              : 
     888           11 :             DEALLOCATE (Z_global_temp, Z_blk)
     889           11 :             DEALLOCATE (D_local, d_vec_local, d_lp_local)
     890           11 :             DEALLOCATE (rho_pair_local, int_3c_local, int_2d_local)
     891              : 
     892           11 :             DEALLOCATE (local_grid_idx, phi_local)
     893              : 
     894              :          END DO
     895              : 
     896              :          ! Clean up the massive global array once the loop finishes
     897            8 :          DEALLOCATE (phi_global)
     898              : 
     899            8 :          CALL dbcsr_finalize(mat_Z_lP)
     900              : 
     901            8 :          IF (bs_env%unit_nr > 0) THEN
     902              :             WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
     903            4 :                'Computed Z_lP ', ' Execution time', m_walltime() - t1, ' s'
     904            4 :             WRITE (bs_env%unit_nr, '(A)') ' '
     905              :          END IF
     906              : 
     907            8 :          logger => cp_get_default_logger()
     908              : 
     909            8 :          IF (BTEST(cp_print_key_should_output(logger%iter_info, input, key), cp_p_file)) THEN
     910            0 :             CALL dbcsr_binary_write(matrix=mat_Z_lP, filepath=TRIM(bs_env%prefix)//"Z_lP.matrix")
     911              :          END IF
     912              : 
     913           16 :          DEALLOCATE (map_global_to_local)
     914              : 
     915              :       END IF
     916              : 
     917           10 :       DEALLOCATE (row_offset, ri_blk_sizes, col_dist_ri)
     918           10 :       CALL dbcsr_distribution_release(dist_Z)
     919              : 
     920           10 :       DEALLOCATE (ri_rs_grid_points)
     921              : 
     922           10 :       CALL timestop(handle)
     923              : 
     924           40 :    END SUBROUTINE compute_coeff_Z_lP
     925              : 
     926              : ! **************************************************************************************************
     927              : !> \brief Computes the dense localized Right-Hand Side (RHS) matrix d_lp
     928              : !> \param qs_env ...
     929              : !> \param bs_env ...
     930              : !> \param phi_val ...
     931              : !> \param d_lp ...
     932              : !> \param n_grid_total ...
     933              : !> \param n_loc_ri ...
     934              : !> \param atom_P ...
     935              : !> \param rho_pair ...
     936              : !> \param int_3c ...
     937              : !> \param int_2d ...
     938              : !> \param max_ao_size ...
     939              : ! **************************************************************************************************
     940              : 
     941           44 :    SUBROUTINE compute_d_lp(qs_env, bs_env, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, &
     942           11 :                            rho_pair, int_3c, int_2d, max_ao_size)
     943              : 
     944              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     945              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     946              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: phi_val
     947              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: d_lp
     948              :       INTEGER, INTENT(IN)                                :: n_grid_total, n_loc_ri, atom_P
     949              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: rho_pair
     950              :       REAL(KIND=dp), DIMENSION(:, :, :), INTENT(INOUT)   :: int_3c
     951              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: int_2d
     952              :       INTEGER, INTENT(IN)                                :: max_ao_size
     953              : 
     954              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_d_lp'
     955              : 
     956              :       INTEGER                                            :: atom_j, atom_k, handle, j, jk_idx, &
     957              :                                                             jsize, jstart, k, ksize, kstart, l, ri
     958              : 
     959           11 :       CALL timeset(routineN, handle)
     960              : 
     961           42 :       DO atom_j = 1, SIZE(bs_env%i_ao_start_from_atom)
     962           31 :          jstart = bs_env%i_ao_start_from_atom(atom_j)
     963           31 :          jsize = bs_env%i_ao_end_from_atom(atom_j) - jstart + 1
     964              : 
     965          131 :          DO atom_k = 1, SIZE(bs_env%i_ao_start_from_atom)
     966           89 :             kstart = bs_env%i_ao_start_from_atom(atom_k)
     967           89 :             ksize = bs_env%i_ao_end_from_atom(atom_k) - kstart + 1
     968              : 
     969         7794 :             int_3c(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
     970              : 
     971              :             ! Compute B_{μν,P} = (μν|P)
     972              :             CALL build_3c_integral_block(int_3c(1:jsize, 1:ksize, 1:n_loc_ri), &
     973              :                                          qs_env, bs_env%ri_metric, &
     974              :                                          basis_j=bs_env%basis_set_AO, &
     975              :                                          basis_k=bs_env%basis_set_AO, &
     976              :                                          basis_i=bs_env%basis_set_RI, &
     977           89 :                                          atom_k=atom_k, atom_j=atom_j, atom_i=atom_P)
     978              : 
     979              :             ! Build Pair Density: ρ(l, μν) = Φ_μ(r_l) \times Φ_ν(r_l)
     980              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     981              :             !$OMP SHARED(ksize, jsize, n_grid_total, rho_pair, phi_val, jstart, kstart) &
     982              :             !$OMP PRIVATE(k, j, l, jk_idx) &
     983           89 :             !$OMP COLLAPSE(2)
     984              :             DO k = 1, ksize
     985              :                DO j = 1, jsize
     986              :                   jk_idx = (k - 1)*jsize + j
     987              :                   DO l = 1, n_grid_total
     988              :                      rho_pair(l, jk_idx) = phi_val(l, jstart + j - 1)*phi_val(l, kstart + k - 1)
     989              :                   END DO
     990              :                END DO
     991              :             END DO
     992              :             !$OMP END PARALLEL DO
     993              : 
     994              :             ! Flatten 3D B_{μν, P} tensor to 2D B_{(μν), P} matrix for BLAS
     995              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     996              :             !$OMP SHARED(n_loc_ri, ksize, jsize, int_2d, int_3c) &
     997              :             !$OMP PRIVATE(ri, k, j, jk_idx) &
     998           89 :             !$OMP COLLAPSE(2)
     999              :             DO ri = 1, n_loc_ri
    1000              :                DO k = 1, ksize
    1001              :                   DO j = 1, jsize
    1002              :                      jk_idx = (k - 1)*jsize + j
    1003              :                      int_2d(jk_idx, ri) = int_3c(j, k, ri)
    1004              :                   END DO
    1005              :                END DO
    1006              :             END DO
    1007              :             !$OMP END PARALLEL DO
    1008              : 
    1009              :             ! Perform Contraction: d_{l, P} += ρ(l, μν) \times B_{(μν), P}
    1010              :             CALL dgemm("N", "N", n_grid_total, n_loc_ri, jsize*ksize, &
    1011              :                        1.0_dp, rho_pair, n_grid_total, &
    1012              :                        int_2d, max_ao_size*max_ao_size, &
    1013          120 :                        1.0_dp, d_lp(1, 1), n_grid_total)
    1014              :          END DO
    1015              :       END DO
    1016              : 
    1017           11 :       CALL timestop(handle)
    1018              : 
    1019           11 :    END SUBROUTINE compute_d_lp
    1020              : 
    1021              : ! **************************************************************************************************
    1022              : !> \brief Computes the χ(iτ, k=0) matrix
    1023              : !> \param bs_env ...
    1024              : !> \param mat_chi_Gamma_tau ...
    1025              : !> \param mat_phi_mu_l ...
    1026              : !> \param mat_Z_lP ...
    1027              : ! **************************************************************************************************
    1028              : 
    1029           10 :    SUBROUTINE get_mat_chi_Gamma_tau(bs_env, mat_chi_Gamma_tau, mat_phi_mu_l, mat_Z_lP)
    1030              : 
    1031              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1032              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_chi_Gamma_tau
    1033              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, mat_Z_lP
    1034              : 
    1035              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_mat_chi_Gamma_tau'
    1036              : 
    1037              :       INTEGER                                            :: handle, i, i_t, ispin, npcol
    1038           10 :       INTEGER, DIMENSION(:), POINTER                     :: blk_ao, blk_grid, dist_col_ao, &
    1039           10 :                                                             dist_col_grid, dist_row_grid
    1040              :       REAL(KIND=dp)                                      :: t1, tau
    1041              :       TYPE(dbcsr_distribution_type)                      :: dist_grid_grid, dist_phi
    1042              :       TYPE(dbcsr_type)                                   :: matrix_chi_grid, matrix_chi_grid_spin, &
    1043              :                                                             matrix_G_occ_grid, matrix_G_vir_grid
    1044              : 
    1045           10 :       CALL timeset(routineN, handle)
    1046              : 
    1047              :       ! =========================================================================
    1048              :       ! 1. SETUP CORE TOPOLOGIES
    1049              :       ! =========================================================================
    1050           10 :       CALL dbcsr_get_info(mat_phi_mu_l, distribution=dist_phi, row_blk_size=blk_grid, col_blk_size=blk_ao)
    1051           10 :       CALL dbcsr_distribution_get(dist_phi, row_dist=dist_row_grid, col_dist=dist_col_ao)
    1052              : 
    1053              :       ! Determine number of MPI process columns
    1054           38 :       npcol = MAXVAL(dist_col_ao) + 1
    1055              : 
    1056              :       ! Build a perfectly safe column distribution for the Grid dimension
    1057           30 :       ALLOCATE (dist_col_grid(SIZE(blk_grid)))
    1058           40 :       DO i = 1, SIZE(blk_grid)
    1059           40 :          dist_col_grid(i) = MOD(i - 1, npcol)
    1060              :       END DO
    1061              : 
    1062              :       CALL dbcsr_distribution_new(dist_grid_grid, template=dist_phi, &
    1063           10 :                                   row_dist=dist_row_grid, col_dist=dist_col_grid)
    1064              : 
    1065           10 :       CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1066           10 :       CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1067           10 :       CALL dbcsr_create(matrix_chi_grid, "chi_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1068           10 :       CALL dbcsr_create(matrix_chi_grid_spin, "chi_grid_spin", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1069              : 
    1070              :       ! =========================================================================
    1071              :       ! 2. MAIN IMAGINARY TIME LOOP
    1072              :       ! =========================================================================
    1073          120 :       DO i_t = 1, bs_env%num_time_freq_points
    1074          110 :          t1 = m_walltime()
    1075              : 
    1076          110 :          tau = bs_env%imag_time_points(i_t)
    1077          110 :          CALL dbcsr_set(matrix_chi_grid, 0.0_dp)
    1078              : 
    1079              :          ! ----------------------------------------------------------------------
    1080              :          ! A. SPIN LOOP (Allocations safely encapsulated in wrappers)
    1081              :          ! ----------------------------------------------------------------------
    1082          240 :          DO ispin = 1, bs_env%n_spin
    1083              : 
    1084              :             ! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1085              :             ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
    1086              :             CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, &
    1087          130 :                               matrix_G_occ_grid, bs_env%eps_filter)
    1088              : 
    1089              :             ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1090              :             ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
    1091              :             CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, &
    1092          130 :                               matrix_G_vir_grid, bs_env%eps_filter)
    1093              : 
    1094              :             ! -------------------------------------------------------------------
    1095              :             ! B. ELEMENT-WISE HADAMARD PRODUCT
    1096              :             ! -------------------------------------------------------------------
    1097              :             ! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
    1098          130 :             CALL hadamard_product(matrix_G_occ_grid, matrix_G_vir_grid, matrix_chi_grid_spin, bs_env%spin_degeneracy)
    1099              : 
    1100              :             ! Accumulate spin contributions
    1101          240 :             CALL dbcsr_add(matrix_chi_grid, matrix_chi_grid_spin, 1.0_dp, 1.0_dp)
    1102              : 
    1103              :          END DO ! ispin
    1104              : 
    1105              :          ! ----------------------------------------------------------------------
    1106              :          ! C. TRANSFORM TO AUXILIARY BASIS & EXPORT DIRECTLY
    1107              :          ! χ_aux        = Z^T * χ_grid * Z
    1108              :          ! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
    1109              :          ! Result is dumped directly into the final array mat_chi_Gamma_tau!
    1110              :          ! ----------------------------------------------------------------------
    1111              :          CALL contract_A_B_A("T", "N", mat_Z_lP, matrix_chi_grid, &
    1112          110 :                              mat_chi_Gamma_tau(i_t)%matrix, bs_env%eps_filter)
    1113              : 
    1114          120 :          IF (bs_env%unit_nr > 0) THEN
    1115              :             WRITE (bs_env%unit_nr, '(T2,A,I13,A,I3,A,F7.1,A)') &
    1116           55 :                'Computed χ(iτ,k=0) for time point', i_t, ' /', bs_env%num_time_freq_points, &
    1117          110 :                ',    Execution time', m_walltime() - t1, ' s'
    1118              :          END IF
    1119              : 
    1120              :       END DO ! i_t
    1121              : 
    1122              :       ! =========================================================================
    1123              :       ! 3. FINAL CLEANUP
    1124              :       ! =========================================================================
    1125           10 :       CALL dbcsr_release(matrix_G_occ_grid)
    1126           10 :       CALL dbcsr_release(matrix_G_vir_grid)
    1127           10 :       CALL dbcsr_release(matrix_chi_grid)
    1128           10 :       CALL dbcsr_release(matrix_chi_grid_spin)
    1129           10 :       CALL dbcsr_distribution_release(dist_grid_grid)
    1130           10 :       DEALLOCATE (dist_col_grid)
    1131              : 
    1132           10 :       IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
    1133              : 
    1134           10 :       CALL timestop(handle)
    1135              : 
    1136           20 :    END SUBROUTINE get_mat_chi_Gamma_tau
    1137              : 
    1138              : ! **************************************************************************************************
    1139              : !> \brief Computes Green's Function in grid basis
    1140              : !> \param bs_env ...
    1141              : !> \param tau ...
    1142              : !> \param ispin ...
    1143              : !> \param occ ...
    1144              : !> \param vir ...
    1145              : !> \param mat_phi_mu_l ...
    1146              : !> \param matrix_G_grid ...
    1147              : !> \param eps_filter ...
    1148              : ! **************************************************************************************************
    1149              : 
    1150         1064 :    SUBROUTINE build_G_grid(bs_env, tau, ispin, occ, vir, mat_phi_mu_l, matrix_G_grid, eps_filter)
    1151              : 
    1152              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1153              :       REAL(KIND=dp), INTENT(IN)                          :: tau
    1154              :       INTEGER, INTENT(IN)                                :: ispin
    1155              :       LOGICAL, INTENT(IN)                                :: occ, vir
    1156              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, matrix_G_grid
    1157              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    1158              : 
    1159              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'build_G_grid'
    1160              : 
    1161              :       INTEGER                                            :: handle
    1162          532 :       INTEGER, DIMENSION(:), POINTER                     :: blk_ao, dist_row_ao
    1163              :       TYPE(cp_fm_type), POINTER                          :: fm_G
    1164              :       TYPE(dbcsr_distribution_type)                      :: dist_ao_ao
    1165              :       TYPE(dbcsr_type)                                   :: matrix_G_ao
    1166              : 
    1167          532 :       CALL timeset(routineN, handle)
    1168              : 
    1169              :       ! 1. Select the correct FM matrix based on occ/vir flags
    1170          532 :       IF (occ) THEN
    1171          272 :          fm_G => bs_env%fm_Gocc
    1172              :       ELSE
    1173          260 :          fm_G => bs_env%fm_Gvir
    1174              :       END IF
    1175              : 
    1176              :       ! 2. Compute Dense FM Green's Function
    1177              :       ! G^occ/vir_µλ(i|τ|,k=0) = sum_G^occ/vir_µλn^occ/vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1178          532 :       CALL G_occ_vir(bs_env, tau, fm_G, ispin, occ=occ, vir=vir)
    1179              : 
    1180              :       ! 3. Setup AO DBCSR Topology and Create Matrix dynamically
    1181          532 :       CALL setup_square_topology(mat_phi_mu_l, 'COL', dist_ao_ao, blk_ao, dist_row_ao)
    1182              : 
    1183              :       CALL dbcsr_create(matrix_G_ao, name="G_ao", dist=dist_ao_ao, &
    1184              :                         matrix_type=dbcsr_type_no_symmetry, &
    1185          532 :                         row_blk_size=blk_ao, col_blk_size=blk_ao)
    1186              : 
    1187              :       ! 4. Convert FM to Sparse DBCSR
    1188          532 :       CALL copy_fm_to_dbcsr(fm_G, matrix_G_ao, keep_sparsity=.FALSE.)
    1189              : 
    1190              :       ! 5. Transform to Grid Basis: G_grid = phi * G_ao * phi^T
    1191              :       ! G^occ/vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ/vir_µν Φ_ν(r_l')
    1192          532 :       CALL contract_A_B_A("N", "T", mat_phi_mu_l, matrix_G_ao, matrix_G_grid, eps_filter)
    1193              : 
    1194              :       ! 6. Release AO matrix and topology
    1195          532 :       CALL release_dbcsr_topology_and_matrices(dist=dist_ao_ao, mapped_dist=dist_row_ao, m1=matrix_G_ao)
    1196              : 
    1197          532 :       CALL timestop(handle)
    1198              : 
    1199          532 :    END SUBROUTINE build_G_grid
    1200              : 
    1201              : ! **************************************************************************************************
    1202              : !> \brief Generalized routine to compute OUT = A * B * A^T  OR  OUT = A^T * B * A using DBCSR
    1203              : !> \param transA_left ...
    1204              : !> \param transA_right ...
    1205              : !> \param matrix_A ...
    1206              : !> \param matrix_B ...
    1207              : !> \param matrix_out ...
    1208              : !> \param eps_filter ...
    1209              : ! **************************************************************************************************
    1210              : 
    1211         1034 :    SUBROUTINE contract_A_B_A(transA_left, transA_right, matrix_A, matrix_B, matrix_out, eps_filter)
    1212              : 
    1213              :       CHARACTER(LEN=1), INTENT(IN)                       :: transA_left, transA_right
    1214              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_A, matrix_B, matrix_out
    1215              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    1216              : 
    1217              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'contract_A_B_A'
    1218              : 
    1219              :       INTEGER                                            :: handle
    1220              :       TYPE(dbcsr_type)                                   :: matrix_tmp
    1221              : 
    1222         1034 :       CALL timeset(routineN, handle)
    1223              : 
    1224         1034 :       CALL dbcsr_create(matrix_tmp, template=matrix_A)
    1225              : 
    1226         1034 :       IF (transA_left == "N" .AND. transA_right == "T") THEN
    1227              :          ! Path 1: Out = A * B * A^T
    1228              :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_A, matrix_B, &
    1229          652 :                              0.0_dp, matrix_tmp, filter_eps=eps_filter)
    1230              :          CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_tmp, matrix_A, &
    1231          652 :                              0.0_dp, matrix_out, filter_eps=eps_filter)
    1232              : 
    1233          382 :       ELSE IF (transA_left == "T" .AND. transA_right == "N") THEN
    1234              :          ! Path 2: Out = A^T * B * A
    1235              :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_B, matrix_A, &
    1236          382 :                              0.0_dp, matrix_tmp, filter_eps=eps_filter)
    1237              :          CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_A, matrix_tmp, &
    1238          382 :                              0.0_dp, matrix_out, filter_eps=eps_filter)
    1239              :       ELSE
    1240            0 :          CPABORT("Unsupported transposition pair in contract_A_B_A")
    1241              :       END IF
    1242              : 
    1243         1034 :       CALL dbcsr_release(matrix_tmp)
    1244              : 
    1245         1034 :       CALL timestop(handle)
    1246              : 
    1247         1034 :    END SUBROUTINE contract_A_B_A
    1248              : 
    1249              : ! **************************************************************************************************
    1250              : !> \brief Computes C = A ◦ B (Element-wise Hadamard product) for sparse DBCSR matrices.
    1251              : !> \param matrix_A ...
    1252              : !> \param matrix_B ...
    1253              : !> \param matrix_C ...
    1254              : !> \param fac (Scaling factor applied to the product)
    1255              : ! **************************************************************************************************
    1256              : 
    1257          804 :    SUBROUTINE hadamard_product(matrix_A, matrix_B, matrix_C, fac)
    1258              : 
    1259              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_A, matrix_B, matrix_C
    1260              :       REAL(KIND=dp), INTENT(IN)                          :: fac
    1261              : 
    1262              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hadamard_product'
    1263              : 
    1264              :       INTEGER                                            :: col, handle, row
    1265              :       LOGICAL                                            :: found
    1266          402 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: blk_B, blk_C
    1267              :       TYPE(dbcsr_iterator_type)                          :: iter
    1268              : 
    1269          402 :       CALL timeset(routineN, handle)
    1270              : 
    1271          402 :       CALL dbcsr_copy(matrix_C, matrix_A)
    1272              : 
    1273          402 :       CALL dbcsr_iterator_start(iter, matrix_C)
    1274         2211 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1275         1809 :          CALL dbcsr_iterator_next_block(iter, row, col, blk_C)
    1276              : 
    1277         1809 :          CALL dbcsr_get_block_p(matrix_B, row, col, blk_B, found)
    1278              : 
    1279         2211 :          IF (found) THEN
    1280    159523682 :             blk_C(:, :) = fac*blk_C(:, :)*blk_B(:, :)
    1281              :          ELSE
    1282              :             ! If B is sparse here, the product is zero
    1283            0 :             blk_C(:, :) = 0.0_dp
    1284              :          END IF
    1285              :       END DO
    1286          402 :       CALL dbcsr_iterator_stop(iter)
    1287              : 
    1288          402 :       CALL timestop(handle)
    1289              : 
    1290          402 :    END SUBROUTINE hadamard_product
    1291              : 
    1292              : ! **************************************************************************************************
    1293              : !> \brief Computes the exact exchange part of the GW self-energy
    1294              : !> \param bs_env ...
    1295              : !> \param qs_env ...
    1296              : !> \param mat_phi_mu_l ...
    1297              : !> \param mat_Z_lP ...
    1298              : !> \param fm_Sigma_x_Gamma ...
    1299              : ! **************************************************************************************************
    1300              : 
    1301           10 :    SUBROUTINE compute_Sigma_x(bs_env, qs_env, mat_phi_mu_l, mat_Z_lP, fm_Sigma_x_Gamma)
    1302              : 
    1303              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1304              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1305              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, mat_Z_lP
    1306              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_Sigma_x_Gamma
    1307              : 
    1308              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_Sigma_x'
    1309              : 
    1310              :       INTEGER                                            :: handle, ispin
    1311           10 :       INTEGER, DIMENSION(:), POINTER                     :: blk_aux, blk_grid, dist_col_grid, &
    1312           10 :                                                             dist_row_aux
    1313              :       REAL(KIND=dp)                                      :: t1
    1314           10 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: fm_Vtr_Gamma
    1315              :       TYPE(dbcsr_distribution_type)                      :: dist_aux_aux, dist_grid_grid
    1316              :       TYPE(dbcsr_type)                                   :: mat_Sigma_x_Gamma, matrix_D_grid, &
    1317              :                                                             matrix_Sigma_x_grid, matrix_V_aux, &
    1318              :                                                             matrix_V_grid
    1319              : 
    1320           10 :       CALL timeset(routineN, handle)
    1321              : 
    1322           10 :       t1 = m_walltime()
    1323              : 
    1324           42 :       ALLOCATE (fm_Sigma_x_Gamma(bs_env%n_spin))
    1325           22 :       DO ispin = 1, bs_env%n_spin
    1326           22 :          CALL cp_fm_create(fm_Sigma_x_Gamma(ispin), bs_env%fm_s_Gamma%matrix_struct)
    1327              :       END DO
    1328              : 
    1329           10 :       CALL dbcsr_create(mat_Sigma_x_Gamma, template=bs_env%mat_ao_ao%matrix)
    1330              : 
    1331              :       ! =========================================================================
    1332              :       ! 1. SETUP CORE TOPOLOGIES
    1333              :       ! =========================================================================
    1334           10 :       CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
    1335           10 :       CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
    1336              : 
    1337              :       ! =========================================================================
    1338              :       ! 2. COMPUTE V^tr_ll'
    1339              :       ! =========================================================================
    1340              :       CALL RI_2c_integral_mat(qs_env, fm_Vtr_Gamma, bs_env%fm_RI_RI, bs_env%n_RI, &
    1341           10 :                               bs_env%trunc_coulomb, do_kpoints=.FALSE.)
    1342              : 
    1343              :       ! M^-1 * V^tr * M^-1 directly modifies fm_Vtr_Gamma(:, 1)
    1344           10 :       CALL multiply_fm_W_MIC_time_with_Minv_Gamma(bs_env, qs_env, fm_Vtr_Gamma(:, 1))
    1345              : 
    1346           10 :       CALL dbcsr_create(matrix_V_aux, "V_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
    1347           10 :       CALL dbcsr_create(matrix_V_grid, "V_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1348              : 
    1349           10 :       CALL copy_fm_to_dbcsr(fm_Vtr_Gamma(1, 1), matrix_V_aux, keep_sparsity=.FALSE.)
    1350              : 
    1351              :       ! V^tr_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
    1352           10 :       CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_V_aux, matrix_V_grid, bs_env%eps_filter)
    1353           10 :       CALL dbcsr_release(matrix_V_aux)
    1354              : 
    1355              :       ! =========================================================================
    1356              :       ! 3. SPIN LOOP FOR EXACT EXCHANGE
    1357              :       ! =========================================================================
    1358           22 :       DO ispin = 1, bs_env%n_spin
    1359              : 
    1360              :          ! Density matrix on grid is essentially G_occ at tau = 0.0
    1361              :          ! D_µν  = sum_n^occ C_µn(k=0) C_νn(k=0)
    1362              :          ! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
    1363           12 :          CALL dbcsr_create(matrix_D_grid, "D_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1364           12 :          CALL build_G_grid(bs_env, 0.0_dp, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_D_grid, bs_env%eps_filter)
    1365              : 
    1366              :          ! Element-wise Hadamard product: Σ^x_grid = D_grid ◦ V_grid
    1367              :          ! Σ^x_ll' = D_ll' * V^tr_ll'
    1368           12 :          CALL dbcsr_create(matrix_Sigma_x_grid, template=matrix_V_grid)
    1369           12 :          CALL hadamard_product(matrix_D_grid, matrix_V_grid, matrix_Sigma_x_grid, 1.0_dp)
    1370              : 
    1371           12 :          CALL dbcsr_release(matrix_D_grid)
    1372              : 
    1373              :          ! Transform back to AO basis: Σ^x_ao = -1.0 * phi^T * Σ^x_grid * phi
    1374              :          ! Σ^x_λσ(k=0)   = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
    1375           12 :          CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_x_grid, mat_Sigma_x_Gamma, bs_env%eps_filter)
    1376           12 :          CALL dbcsr_scale(mat_Sigma_x_Gamma, -1.0_dp)
    1377              : 
    1378           12 :          CALL dbcsr_release(matrix_Sigma_x_grid)
    1379              : 
    1380              :          ! Data I/O and Export to CP2K Full Matrices
    1381           22 :          CALL copy_dbcsr_to_fm(mat_Sigma_x_Gamma, fm_Sigma_x_Gamma(ispin))
    1382              : 
    1383              :       END DO ! ispin
    1384              : 
    1385           10 :       IF (bs_env%unit_nr > 0) THEN
    1386              :          WRITE (bs_env%unit_nr, '(T2,A,T58,A,F7.1,A)') &
    1387            5 :             'Computed Σ^x(k=0),', ' Execution time', m_walltime() - t1, ' s'
    1388            5 :          WRITE (bs_env%unit_nr, '(A)') ' '
    1389              :       END IF
    1390              : 
    1391              :       ! =========================================================================
    1392              :       ! 4. CLEANUP
    1393              :       ! =========================================================================
    1394              :       CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid, &
    1395           10 :                                                m1=mat_Sigma_x_Gamma, m2=matrix_V_grid)
    1396           10 :       CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
    1397              : 
    1398           10 :       CALL cp_fm_release(fm_Vtr_Gamma)
    1399              : 
    1400           10 :       CALL timestop(handle)
    1401              : 
    1402           30 :    END SUBROUTINE compute_Sigma_x
    1403              : 
    1404              : ! **************************************************************************************************
    1405              : !> \brief Computes the correlation part of the GW self-energy
    1406              : !> \param bs_env ...
    1407              : !> \param fm_W_MIC_time ...
    1408              : !> \param mat_phi_mu_l ...
    1409              : !> \param mat_Z_lP ...
    1410              : !> \param fm_Sigma_c_Gamma_time ...
    1411              : ! **************************************************************************************************
    1412              : 
    1413           10 :    SUBROUTINE compute_Sigma_c(bs_env, fm_W_MIC_time, mat_phi_mu_l, mat_Z_lP, fm_Sigma_c_Gamma_time)
    1414              : 
    1415              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1416              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_W_MIC_time
    1417              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, mat_Z_lP
    1418              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: fm_Sigma_c_Gamma_time
    1419              : 
    1420              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_Sigma_c'
    1421              : 
    1422              :       INTEGER                                            :: handle, i_t, ispin
    1423           10 :       INTEGER, DIMENSION(:), POINTER                     :: blk_aux, blk_grid, dist_col_grid, &
    1424           10 :                                                             dist_row_aux
    1425              :       REAL(KIND=dp)                                      :: t1, tau
    1426              :       TYPE(dbcsr_distribution_type)                      :: dist_aux_aux, dist_grid_grid
    1427           10 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: mat_Sigma_neg_tau, mat_Sigma_pos_tau
    1428              :       TYPE(dbcsr_type) :: matrix_G_occ_grid, matrix_G_vir_grid, matrix_Sigma_neg_grid, &
    1429              :          matrix_Sigma_pos_grid, matrix_W_aux, matrix_W_grid
    1430              : 
    1431           10 :       CALL timeset(routineN, handle)
    1432              : 
    1433              :       ! =========================================================================
    1434              :       ! 1. SETUP CORE TOPOLOGIES AND PRE-ALLOCATE OUTPUT ARRAYS
    1435              :       ! =========================================================================
    1436           10 :       CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
    1437           10 :       CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
    1438              : 
    1439              :       ! Pre-allocate local DBCSR matrices to act as targets for final output
    1440           10 :       NULLIFY (mat_Sigma_neg_tau, mat_Sigma_pos_tau)
    1441          182 :       ALLOCATE (mat_Sigma_neg_tau(bs_env%num_time_freq_points, bs_env%n_spin))
    1442          182 :       ALLOCATE (mat_Sigma_pos_tau(bs_env%num_time_freq_points, bs_env%n_spin))
    1443              : 
    1444          120 :       DO i_t = 1, bs_env%num_time_freq_points
    1445          250 :          DO ispin = 1, bs_env%n_spin
    1446          130 :             ALLOCATE (mat_Sigma_neg_tau(i_t, ispin)%matrix)
    1447          130 :             ALLOCATE (mat_Sigma_pos_tau(i_t, ispin)%matrix)
    1448          130 :             CALL dbcsr_create(mat_Sigma_neg_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
    1449          240 :             CALL dbcsr_create(mat_Sigma_pos_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
    1450              :          END DO
    1451              :       END DO
    1452              : 
    1453              :       ! =========================================================================
    1454              :       ! 2. MAIN IMAGINARY TIME LOOP
    1455              :       ! =========================================================================
    1456          120 :       DO i_t = 1, bs_env%num_time_freq_points
    1457          110 :          tau = bs_env%imag_time_points(i_t)
    1458              : 
    1459              :          ! -------------------------------------------------------------------
    1460              :          ! Compute W_grid = Z * W_aux * Z^T
    1461              :          ! -------------------------------------------------------------------
    1462          110 :          CALL dbcsr_create(matrix_W_aux, "W_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
    1463          110 :          CALL dbcsr_create(matrix_W_grid, "W_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1464              : 
    1465          110 :          CALL copy_fm_to_dbcsr(fm_W_MIC_time(i_t), matrix_W_aux, keep_sparsity=.FALSE.)
    1466              : 
    1467              :          ! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
    1468          110 :          CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_W_aux, matrix_W_grid, bs_env%eps_filter)
    1469              : 
    1470          110 :          CALL dbcsr_release(matrix_W_aux) ! Clean up aux basis immediately
    1471              : 
    1472          240 :          DO ispin = 1, bs_env%n_spin
    1473          130 :             t1 = m_walltime()
    1474              : 
    1475              :             ! -------------------------------------------------------------------
    1476              :             ! A. Transform Green's Functions to the Grid
    1477              :             ! -------------------------------------------------------------------
    1478          130 :             CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1479          130 :             CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1480              : 
    1481              :             ! G^occ_µλ(i|τ|,k=0) = sum_G^occ_µλn^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1482              :             ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
    1483          130 :             CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_G_occ_grid, bs_env%eps_filter)
    1484              : 
    1485              :             ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1486              :             ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
    1487          130 :             CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, matrix_G_vir_grid, bs_env%eps_filter)
    1488              : 
    1489              :             ! -------------------------------------------------------------------
    1490              :             ! B. Element-wise Hadamard Products for Sigma_c on Grid
    1491              :             ! Σ_neg_grid = G_occ_grid ◦ W_grid
    1492              :             ! Σ_pos_grid = G_vir_grid ◦ W_grid
    1493              :             ! -------------------------------------------------------------------
    1494          130 :             CALL dbcsr_create(matrix_Sigma_neg_grid, template=matrix_W_grid)
    1495          130 :             CALL dbcsr_create(matrix_Sigma_pos_grid, template=matrix_W_grid)
    1496              : 
    1497              :             ! Σ^c_ll'(iτ,k=0)   = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
    1498          130 :             CALL hadamard_product(matrix_G_occ_grid, matrix_W_grid, matrix_Sigma_neg_grid, 1.0_dp)
    1499              : 
    1500              :             ! Σ^c_ll'(iτ,k=0)   =  G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
    1501          130 :             CALL hadamard_product(matrix_G_vir_grid, matrix_W_grid, matrix_Sigma_pos_grid, 1.0_dp)
    1502              : 
    1503              :             ! Instantly purge massive G_grid arrays to save memory
    1504          130 :             CALL dbcsr_release(matrix_G_occ_grid)
    1505          130 :             CALL dbcsr_release(matrix_G_vir_grid)
    1506              : 
    1507              :             ! -------------------------------------------------------------------
    1508              :             ! C. Transform Sigma back to AO Basis
    1509              :             ! Σ_AO = phi^T * Σ_grid * phi
    1510              :             ! -------------------------------------------------------------------
    1511              : 
    1512              :             ! Σ^c_λσ(iτ,k=0)    = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ < 0
    1513              :             CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_neg_grid, &
    1514          130 :                                 mat_Sigma_neg_tau(i_t, ispin)%matrix, bs_env%eps_filter)
    1515          130 :             CALL dbcsr_scale(mat_Sigma_neg_tau(i_t, ispin)%matrix, -1.0_dp)
    1516              : 
    1517              :             ! Σ^c_λσ(iτ,k=0)    = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ > 0
    1518              :             CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_pos_grid, &
    1519          130 :                                 mat_Sigma_pos_tau(i_t, ispin)%matrix, bs_env%eps_filter)
    1520              : 
    1521              :             ! Purge Grid Sigma arrays
    1522          130 :             CALL dbcsr_release(matrix_Sigma_neg_grid)
    1523          130 :             CALL dbcsr_release(matrix_Sigma_pos_grid)
    1524              : 
    1525          240 :             IF (bs_env%unit_nr > 0) THEN
    1526              :                WRITE (bs_env%unit_nr, '(T2,A,I10,A,I3,A,F7.1,A)') &
    1527           65 :                   'Computed Σ^c(iτ,k=0) for time point ', i_t, ' /', bs_env%num_time_freq_points, &
    1528          130 :                   ',    Execution time', m_walltime() - t1, ' s'
    1529              :             END IF
    1530              : 
    1531              :          END DO ! ispin
    1532              : 
    1533              :          ! Release the W_grid for this time point
    1534          120 :          CALL dbcsr_release(matrix_W_grid)
    1535              : 
    1536              :       END DO ! i_t
    1537              : 
    1538           10 :       IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
    1539              : 
    1540              :       ! -------------------------------------------------------------------------
    1541              :       ! 3. FINALIZE AND CLEANUP
    1542              :       ! -------------------------------------------------------------------------
    1543              :       CALL fill_fm_Sigma_c_Gamma_time(fm_Sigma_c_Gamma_time, bs_env, &
    1544           10 :                                       mat_Sigma_pos_tau, mat_Sigma_neg_tau)
    1545              : 
    1546           10 :       CALL cp_fm_release(fm_W_MIC_time)
    1547              : 
    1548           10 :       CALL dbcsr_deallocate_matrix_set(mat_Sigma_neg_tau)
    1549           10 :       CALL dbcsr_deallocate_matrix_set(mat_Sigma_pos_tau)
    1550              : 
    1551           10 :       CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid)
    1552           10 :       CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
    1553              : 
    1554           10 :       CALL delete_unnecessary_files(bs_env)
    1555           10 :       CALL timestop(handle)
    1556              : 
    1557           10 :    END SUBROUTINE compute_Sigma_c
    1558              : 
    1559              : ! **************************************************************************************************
    1560              : !> \brief DBCSR Topology Generation
    1561              : !> \param matrix_template ...
    1562              : !> \param dim_type ...
    1563              : !> \param square_dist ...
    1564              : !> \param blk_sizes ...
    1565              : !> \param mapped_dist ...
    1566              : ! **************************************************************************************************
    1567              : 
    1568          572 :    SUBROUTINE setup_square_topology(matrix_template, dim_type, square_dist, blk_sizes, mapped_dist)
    1569              : 
    1570              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_template
    1571              :       CHARACTER(LEN=*), INTENT(IN)                       :: dim_type
    1572              :       TYPE(dbcsr_distribution_type), INTENT(OUT)         :: square_dist
    1573              :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: blk_sizes, mapped_dist
    1574              : 
    1575              :       INTEGER                                            :: i, np
    1576          572 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk, col_dist, row_blk, row_dist
    1577              :       TYPE(dbcsr_distribution_type)                      :: dist_template
    1578              : 
    1579              :       CALL dbcsr_get_info(matrix_template, distribution=dist_template, &
    1580          572 :                           row_blk_size=row_blk, col_blk_size=col_blk)
    1581          572 :       CALL dbcsr_distribution_get(dist_template, row_dist=row_dist, col_dist=col_dist)
    1582              : 
    1583          572 :       IF (TRIM(dim_type) == 'ROW') THEN
    1584              :          ! Creates ROW x ROW (e.g., Grid x Grid from mat_phi_mu_l)
    1585           20 :          blk_sizes => row_blk
    1586           76 :          np = MAXVAL(col_dist) + 1  ! npcol
    1587           60 :          ALLOCATE (mapped_dist(SIZE(blk_sizes)))
    1588           80 :          DO i = 1, SIZE(blk_sizes)
    1589           80 :             mapped_dist(i) = MOD(i - 1, np)
    1590              :          END DO
    1591              :          CALL dbcsr_distribution_new(square_dist, template=dist_template, &
    1592           20 :                                      row_dist=row_dist, col_dist=mapped_dist)
    1593              : 
    1594          552 :       ELSE IF (TRIM(dim_type) == 'COL') THEN
    1595              :          ! Creates COL x COL (e.g., Aux x Aux from mat_Z_lP)
    1596          552 :          blk_sizes => col_blk
    1597         2208 :          np = MAXVAL(row_dist) + 1  ! nprow
    1598         1656 :          ALLOCATE (mapped_dist(SIZE(blk_sizes)))
    1599         2040 :          DO i = 1, SIZE(blk_sizes)
    1600         2040 :             mapped_dist(i) = MOD(i - 1, np)
    1601              :          END DO
    1602              :          CALL dbcsr_distribution_new(square_dist, template=dist_template, &
    1603          552 :                                      row_dist=mapped_dist, col_dist=col_dist)
    1604              :       END IF
    1605              : 
    1606          572 :    END SUBROUTINE setup_square_topology
    1607              : 
    1608              : ! **************************************************************************************************
    1609              : !> \brief DBCSR matrices deallocation
    1610              : !> \param dist ...
    1611              : !> \param mapped_dist    ...
    1612              : !> \param m1 ...
    1613              : !> \param m2 ...
    1614              : !> \param m3 ...
    1615              : !> \param m4 ...
    1616              : ! **************************************************************************************************
    1617              : 
    1618          572 :    SUBROUTINE release_dbcsr_topology_and_matrices(dist, mapped_dist, m1, m2, m3, m4)
    1619              : 
    1620              :       TYPE(dbcsr_distribution_type), INTENT(INOUT), &
    1621              :          OPTIONAL                                        :: dist
    1622              :       INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, &
    1623              :          POINTER                                         :: mapped_dist
    1624              :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m1, m2, m3, m4
    1625              : 
    1626          572 :       IF (PRESENT(dist)) CALL dbcsr_distribution_release(dist)
    1627          572 :       IF (PRESENT(mapped_dist)) THEN
    1628          572 :          IF (ASSOCIATED(mapped_dist)) THEN
    1629          572 :             DEALLOCATE (mapped_dist)
    1630              :             NULLIFY (mapped_dist)
    1631              :          END IF
    1632              :       END IF
    1633          572 :       IF (PRESENT(m1)) CALL dbcsr_release(m1)
    1634          572 :       IF (PRESENT(m2)) CALL dbcsr_release(m2)
    1635          572 :       IF (PRESENT(m3)) CALL dbcsr_release(m3)
    1636          572 :       IF (PRESENT(m4)) CALL dbcsr_release(m4)
    1637              : 
    1638          572 :    END SUBROUTINE release_dbcsr_topology_and_matrices
    1639              : 
    1640              : END MODULE gw_large_cell_Gamma_ri_rs
        

Generated by: LCOV version 2.0-1