LCOV - code coverage report
Current view: top level - src/fm - cp_fm_diag.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:cccd2f3) Lines: 82.0 % 527 432
Test Date: 2026-05-06 07:07:47 Functions: 94.1 % 17 16

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief used for collecting some of the diagonalization schemes available for
      10              : !>      cp_fm_type. cp_fm_power also moved here as it is very related
      11              : !> \note
      12              : !>      first version : most routines imported
      13              : !> \par History
      14              : !>      - unused Jacobi routines removed, cosmetics (05.04.06,MK)
      15              : !> \author Joost VandeVondele (2003-08)
      16              : ! **************************************************************************************************
      17              : MODULE cp_fm_diag
      18              :    USE cp_blacs_types, ONLY: cp_blacs_type
      19              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      20              :    USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
      21              :                                  cp_fm_gemm, &
      22              :                                  cp_fm_scale, &
      23              :                                  cp_fm_syrk, &
      24              :                                  cp_fm_triangular_invert, &
      25              :                                  cp_fm_triangular_multiply, &
      26              :                                  cp_fm_uplo_to_full
      27              :    USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
      28              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
      29              :                                cp_fm_redistribute_start
      30              :    USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
      31              :                          finalize_elpa_library, &
      32              :                          initialize_elpa_library, &
      33              :                          set_elpa_kernel
      34              :    USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
      35              :                                  cp_fm_general_cusolver
      36              : #if defined(__DLAF)
      37              :    USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
      38              :    USE cp_dlaf_utils_api, ONLY: cp_dlaf_initialize, cp_dlaf_finalize
      39              : #endif
      40              :    USE cp_fm_types, ONLY: cp_fm_get_info, &
      41              :                           cp_fm_set_element, &
      42              :                           cp_fm_to_fm, &
      43              :                           cp_fm_type, &
      44              :                           cp_fm_create, &
      45              :                           cp_fm_get_info, &
      46              :                           cp_fm_release, &
      47              :                           cp_fm_set_all, &
      48              :                           cp_fm_to_fm, &
      49              :                           cp_fm_to_fm_submat, &
      50              :                           cp_fm_type
      51              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
      52              :                            cp_fm_struct_create, &
      53              :                            cp_fm_struct_release, &
      54              :                            cp_fm_struct_type
      55              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      56              :                               cp_get_default_logger, &
      57              :                               cp_logger_get_default_io_unit, &
      58              :                               cp_logger_type, &
      59              :                               cp_to_string
      60              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      61              :                               cp_logger_get_default_unit_nr, &
      62              :                               cp_logger_get_unit_nr, &
      63              :                               cp_logger_type
      64              :    USE kinds, ONLY: default_string_length, &
      65              :                     dp
      66              :    USE machine, ONLY: default_output_unit, &
      67              :                       m_memory
      68              :    USE parallel_gemm_api, ONLY: parallel_gemm
      69              : #if defined (__parallel)
      70              :    USE message_passing, ONLY: mp_comm_type
      71              : #endif
      72              : #if defined (__HAS_IEEE_EXCEPTIONS)
      73              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      74              :                               ieee_set_halting_mode, &
      75              :                               IEEE_ALL
      76              : #endif
      77              : #include "../base/base_uses.f90"
      78              : 
      79              :    IMPLICIT NONE
      80              :    PRIVATE
      81              : 
      82              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
      83              : 
      84              :    REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
      85              : 
      86              :    ! The following saved variables are diagonalization global
      87              :    ! Stores the default library for diagonalization
      88              :    INTEGER, SAVE, PUBLIC    :: diag_type = 0
      89              :    ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
      90              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      91              :    INTEGER, SAVE            :: elpa_neigvec_min = 0
      92              :    ! Minimum matrix size for the use of the cuSOLVERMp eigensolver.
      93              :    ! Smaller matrices use the ScaLAPACK fallback to avoid GPU launch overheads.
      94              :    INTEGER, PARAMETER, PUBLIC :: cusolver_n_min = 64
      95              : #if defined(__DLAF)
      96              :    ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
      97              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      98              :    INTEGER, SAVE, PUBLIC    :: dlaf_neigvec_min = 0
      99              : #endif
     100              :    LOGICAL, SAVE, PUBLIC :: direct_generalized_diagonalization = .FALSE.
     101              :    ! Threshold value for the orthonormality check of the eigenvectors obtained
     102              :    ! after a diagonalization. A negative value disables the check.
     103              :    REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
     104              : 
     105              :    ! Constants for the diag_type above
     106              :    INTEGER, PARAMETER, PUBLIC  :: FM_DIAG_TYPE_SCALAPACK = 101, &
     107              :                                   FM_DIAG_TYPE_ELPA = 102, &
     108              :                                   FM_DIAG_TYPE_CUSOLVER = 103, &
     109              :                                   FM_DIAG_TYPE_DLAF = 104
     110              : #if defined(__CUSOLVERMP)
     111              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
     112              : #elif defined(__ELPA)
     113              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
     114              : #else
     115              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
     116              : #endif
     117              : 
     118              :    ! Public subroutines
     119              :    PUBLIC :: choose_eigv_solver, &
     120              :              cp_fm_block_jacobi, &
     121              :              cp_fm_power, &
     122              :              cp_fm_syevd, &
     123              :              cp_fm_syevx, &
     124              :              cp_fm_svd, &
     125              :              cp_fm_geeig, &
     126              :              cp_fm_geeig_canon, &
     127              :              diag_check_requested, &
     128              :              diag_check_warning_threshold, &
     129              :              diag_init, &
     130              :              diag_finalize
     131              : 
     132              : CONTAINS
     133              : 
     134              : ! **************************************************************************************************
     135              : !> \brief Setup the diagonalization library to be used
     136              : !> \param diag_lib diag_library flag from GLOBAL section in input
     137              : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
     138              : !>                         to ScaLAPACK was applied, .FALSE. otherwise.
     139              : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
     140              : !> \param elpa_neigvec_min_input ...
     141              : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
     142              : !>                diagonalization procedure of suitably sized matrices
     143              : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
     144              : !>                   be printed
     145              : !> \param elpa_one_stage logical that enables the one-stage solver
     146              : !> \param dlaf_neigvec_min_input ...
     147              : !> \param eps_check_diag_input ...
     148              : !> \param direct_generalized_diagonalization_input ...
     149              : !> \par History
     150              : !>      - Add support for DLA-Future (05.09.2023, RMeli)
     151              : !> \author  MI 11.2013
     152              : ! **************************************************************************************************
     153        10420 :    SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
     154              :                         elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input, &
     155              :                         direct_generalized_diagonalization_input)
     156              :       CHARACTER(LEN=*), INTENT(IN)                       :: diag_lib
     157              :       LOGICAL, INTENT(OUT)                               :: fallback_applied
     158              :       INTEGER, INTENT(IN)                                :: elpa_kernel, elpa_neigvec_min_input
     159              :       LOGICAL, INTENT(IN)                                :: elpa_qr, elpa_print, elpa_one_stage
     160              :       INTEGER, INTENT(IN)                                :: dlaf_neigvec_min_input
     161              :       REAL(KIND=dp), INTENT(IN)                          :: eps_check_diag_input
     162              :       LOGICAL, INTENT(IN), OPTIONAL :: direct_generalized_diagonalization_input
     163              : 
     164              :       LOGICAL, SAVE                                      :: initialized = .FALSE.
     165              : 
     166        10420 :       fallback_applied = .FALSE.
     167              : 
     168        10420 :       IF (diag_lib == "ScaLAPACK") THEN
     169          198 :          diag_type = FM_DIAG_TYPE_SCALAPACK
     170        10222 :       ELSE IF (diag_lib == "ELPA") THEN
     171              : #if defined (__ELPA)
     172              :          ! ELPA is requested and available
     173        10222 :          diag_type = FM_DIAG_TYPE_ELPA
     174              : #else
     175              :          ! ELPA library requested but not linked, switch back to SL
     176              :          diag_type = FM_DIAG_TYPE_SCALAPACK
     177              :          fallback_applied = .TRUE.
     178              : #endif
     179            0 :       ELSE IF (diag_lib == "cuSOLVER") THEN
     180            0 :          diag_type = FM_DIAG_TYPE_CUSOLVER
     181            0 :       ELSE IF (diag_lib == "DLAF") THEN
     182              : #if defined (__DLAF)
     183              :          diag_type = FM_DIAG_TYPE_DLAF
     184              : #else
     185            0 :          CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
     186              : #endif
     187              :       ELSE
     188            0 :          CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
     189              :       END IF
     190              : 
     191              :       ! Initialization of requested diagonalization library
     192        10420 :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
     193         9625 :          CALL initialize_elpa_library(one_stage=elpa_one_stage, qr=elpa_qr, should_print=elpa_print)
     194         9625 :          CALL set_elpa_kernel(elpa_kernel)
     195         9625 :          initialized = .TRUE.
     196              :       END IF
     197              : #if defined(__DLAF)
     198              :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_DLAF) THEN
     199              :          CALL cp_dlaf_initialize()
     200              :          initialized = .TRUE.
     201              :       END IF
     202              :       dlaf_neigvec_min = dlaf_neigvec_min_input
     203              : #else
     204              :       MARK_USED(dlaf_neigvec_min_input)
     205              : #endif
     206              : 
     207        10420 :       elpa_neigvec_min = elpa_neigvec_min_input
     208        10420 :       eps_check_diag = eps_check_diag_input
     209        10420 :       IF (PRESENT(direct_generalized_diagonalization_input)) THEN
     210        10420 :          direct_generalized_diagonalization = direct_generalized_diagonalization_input
     211              :       ELSE
     212            0 :          direct_generalized_diagonalization = .FALSE.
     213              :       END IF
     214              : 
     215        10420 :    END SUBROUTINE diag_init
     216              : 
     217              : ! **************************************************************************************************
     218              : !> \brief Finalize the diagonalization library
     219              : ! **************************************************************************************************
     220        10209 :    SUBROUTINE diag_finalize()
     221              : #if defined (__ELPA)
     222        10209 :       IF (diag_type == FM_DIAG_TYPE_ELPA) &
     223        10011 :          CALL finalize_elpa_library()
     224              : #endif
     225              : #if defined (__DLAF)
     226              :       IF (diag_type == FM_DIAG_TYPE_DLAF) &
     227              :          CALL cp_dlaf_finalize()
     228              : #endif
     229        10209 :    END SUBROUTINE diag_finalize
     230              : 
     231              : ! **************************************************************************************************
     232              : !> \brief   Choose the Eigensolver depending on which library is available
     233              : !>          ELPA seems to be unstable for small systems
     234              : !> \param matrix ...
     235              : !> \param eigenvectors ...
     236              : !> \param eigenvalues ...
     237              : !> \param info ...
     238              : !> \par     info If present returns error code and prevents program stops.
     239              : !>               Works currently only for cp_fm_syevd with ScaLAPACK.
     240              : !>               Other solvers will end the program regardless of PRESENT(info).
     241              : !> \par History
     242              : !>      - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
     243              : ! **************************************************************************************************
     244       201348 :    SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
     245              : 
     246              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     247              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     248              :       INTEGER, INTENT(OUT), OPTIONAL                     :: info
     249              : 
     250              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
     251              : 
     252              :       ! Sample peak memory
     253       201348 :       CALL m_memory()
     254              : 
     255       201348 :       IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
     256              : 
     257       201348 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     258         4560 :          CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     259              : 
     260       196788 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     261       196788 :          IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
     262              :             ! We don't trust ELPA with very small matrices.
     263       180316 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     264              :          ELSE
     265        16472 :             CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     266              :          END IF
     267              : 
     268            0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     269            0 :          IF (matrix%matrix_struct%nrow_global < cusolver_n_min) THEN
     270              :             ! We don't trust cuSolver with very small matrices.
     271            0 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     272              :          ELSE
     273            0 :             CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
     274              :          END IF
     275              : 
     276              : #if defined(__DLAF)
     277              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     278              :          IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
     279              :             ! Use ScaLAPACK for small matrices
     280              :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     281              :          ELSE
     282              :             CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
     283              :          END IF
     284              : #endif
     285              : 
     286              :       ELSE
     287            0 :          CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
     288              :       END IF
     289              : 
     290       201348 :       CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
     291              : 
     292       201348 :    END SUBROUTINE choose_eigv_solver
     293              : 
     294              : ! **************************************************************************************************
     295              : !> \brief Return whether diagonalization checks should be performed.
     296              : !> \return ...
     297              : ! **************************************************************************************************
     298       424308 :    FUNCTION diag_check_requested() RESULT(check_requested)
     299              :       LOGICAL                                            :: check_requested
     300              : 
     301              : #if defined(__CHECK_DIAG)
     302              :       check_requested = .TRUE.
     303              : #else
     304       424308 :       check_requested = eps_check_diag >= 0.0_dp
     305              : #endif
     306              : 
     307       424308 :    END FUNCTION diag_check_requested
     308              : 
     309              : ! **************************************************************************************************
     310              : !> \brief Return the warning threshold for diagonalization checks.
     311              : !> \return ...
     312              : ! **************************************************************************************************
     313       389608 :    FUNCTION diag_check_warning_threshold() RESULT(eps_warning)
     314              :       REAL(KIND=dp)                                      :: eps_warning
     315              : 
     316       389608 :       eps_warning = eps_check_diag_default
     317       389608 :       IF (eps_check_diag >= 0.0_dp) THEN
     318          354 :          eps_warning = eps_check_diag
     319              :       END IF
     320              : 
     321       389608 :    END FUNCTION diag_check_warning_threshold
     322              : 
     323              : ! **************************************************************************************************
     324              : !> \brief   Check result of diagonalization, i.e. the orthonormality of the eigenvectors
     325              : !> \param matrix Work matrix
     326              : !> \param eigenvectors Eigenvectors to be checked
     327              : !> \param nvec ...
     328              : ! **************************************************************************************************
     329       389590 :    SUBROUTINE check_diag(matrix, eigenvectors, nvec)
     330              : 
     331              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     332              :       INTEGER, INTENT(IN)                                :: nvec
     333              : 
     334              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'check_diag'
     335              : 
     336              :       CHARACTER(LEN=default_string_length)               :: diag_type_name
     337              :       REAL(KIND=dp)                                      :: eps, eps_abort, eps_warning, gold, test
     338              :       INTEGER                                            :: handle, i, j, ncol, nrow, output_unit
     339              :       LOGICAL                                            :: check_eigenvectors
     340              : #if defined(__parallel)
     341              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     342              :       INTEGER                                            :: il, jl, ipcol, iprow, &
     343              :                                                             mypcol, myprow, npcol, nprow
     344              :       INTEGER, DIMENSION(9)                              :: desca
     345              : #endif
     346              : 
     347       389590 :       CALL timeset(routineN, handle)
     348              : 
     349       389590 :       output_unit = default_output_unit
     350       389590 :       check_eigenvectors = diag_check_requested()
     351       389590 :       eps_warning = diag_check_warning_threshold()
     352       389590 :       eps_abort = 10.0_dp*eps_warning
     353              : 
     354       389590 :       gold = 0.0_dp
     355       389590 :       test = 0.0_dp
     356       389590 :       eps = 0.0_dp
     357              : 
     358       389590 :       IF (check_eigenvectors) THEN
     359              : #if defined(__parallel)
     360          336 :          nrow = eigenvectors%matrix_struct%nrow_global
     361          336 :          ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
     362          336 :          CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
     363          336 :          context => matrix%matrix_struct%context
     364          336 :          myprow = context%mepos(1)
     365          336 :          mypcol = context%mepos(2)
     366          336 :          nprow = context%num_pe(1)
     367          336 :          npcol = context%num_pe(2)
     368         3360 :          desca(:) = matrix%matrix_struct%descriptor(:)
     369         6044 :          outer: DO j = 1, ncol
     370       250176 :             DO i = 1, ncol
     371       244132 :                CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
     372       249840 :                IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     373       122066 :                   gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     374       122066 :                   test = matrix%local_data(il, jl)
     375       122066 :                   eps = ABS(test - gold)
     376       122066 :                   IF (eps > eps_warning) EXIT outer
     377              :                END IF
     378              :             END DO
     379              :          END DO outer
     380              : #else
     381              :          nrow = SIZE(eigenvectors%local_data, 1)
     382              :          ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
     383              :          CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
     384              :                     eigenvectors%local_data(1, 1), nrow, &
     385              :                     eigenvectors%local_data(1, 1), nrow, &
     386              :                     0.0_dp, matrix%local_data(1, 1), nrow)
     387              :          outer: DO j = 1, ncol
     388              :             DO i = 1, ncol
     389              :                gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     390              :                test = matrix%local_data(i, j)
     391              :                eps = ABS(test - gold)
     392              :                IF (eps > eps_warning) EXIT outer
     393              :             END DO
     394              :          END DO outer
     395              : #endif
     396          336 :          IF (eps > eps_warning) THEN
     397            0 :             IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     398            0 :                diag_type_name = "SYEVD"
     399            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     400            0 :                diag_type_name = "ELPA"
     401            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     402            0 :                diag_type_name = "CUSOLVER"
     403            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     404            0 :                diag_type_name = "DLAF"
     405              :             ELSE
     406            0 :                CPABORT("Unknown diag_type")
     407              :             END IF
     408              :             WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
     409            0 :                "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     410            0 :                "Matrix element (", i, ", ", j, ") = ", test, &
     411            0 :                "The deviation from the expected value ", gold, " is", eps
     412            0 :             IF (eps > eps_abort) THEN
     413            0 :                CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     414              :             ELSE
     415            0 :                CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     416              :             END IF
     417              :          END IF
     418              :       END IF
     419              : 
     420       389590 :       CALL timestop(handle)
     421              : 
     422       389590 :    END SUBROUTINE check_diag
     423              : 
     424              : ! **************************************************************************************************
     425              : !> \brief   Check C^T*S*C = I for a generalized eigenvalue problem.
     426              : !> \param overlap original overlap matrix S; used as work matrix and overwritten
     427              : !> \param eigenvectors eigenvectors C to be checked
     428              : !> \param scratch work matrix
     429              : !> \param nvec ...
     430              : ! **************************************************************************************************
     431            2 :    SUBROUTINE check_generalized_diag(overlap, eigenvectors, scratch, nvec)
     432              : 
     433              :       TYPE(cp_fm_type), INTENT(IN)                       :: eigenvectors
     434              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: overlap, scratch
     435              :       INTEGER, INTENT(IN)                                :: nvec
     436              : 
     437              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'check_generalized_diag'
     438              : 
     439              :       CHARACTER(LEN=default_string_length)               :: diag_type_name
     440              :       REAL(KIND=dp)                                      :: eps, eps_abort, eps_warning, gold, test
     441              :       INTEGER                                            :: handle, i, j, ncol, nrow, output_unit
     442              : #if defined(__parallel)
     443              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     444              :       INTEGER                                            :: il, jl, ipcol, iprow, &
     445              :                                                             mypcol, myprow, npcol, nprow
     446              :       INTEGER, DIMENSION(9)                              :: desca
     447              : #endif
     448              : 
     449            2 :       CALL timeset(routineN, handle)
     450              : 
     451            2 :       IF (.NOT. diag_check_requested()) THEN
     452            0 :          CALL timestop(handle)
     453            0 :          RETURN
     454              :       END IF
     455              : 
     456            2 :       output_unit = default_output_unit
     457            2 :       eps_warning = diag_check_warning_threshold()
     458            2 :       eps_abort = 10.0_dp*eps_warning
     459              : 
     460            2 :       nrow = eigenvectors%matrix_struct%nrow_global
     461            2 :       ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
     462              : 
     463            2 :       CALL parallel_gemm("N", "N", nrow, ncol, nrow, 1.0_dp, overlap, eigenvectors, 0.0_dp, scratch)
     464            2 :       CALL parallel_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, scratch, 0.0_dp, overlap)
     465              : 
     466            2 :       gold = 0.0_dp
     467            2 :       test = 0.0_dp
     468            2 :       eps = 0.0_dp
     469              : 
     470              : #if defined(__parallel)
     471            2 :       context => overlap%matrix_struct%context
     472            2 :       myprow = context%mepos(1)
     473            2 :       mypcol = context%mepos(2)
     474            2 :       nprow = context%num_pe(1)
     475            2 :       npcol = context%num_pe(2)
     476           20 :       desca(:) = overlap%matrix_struct%descriptor(:)
     477           18 :       outer: DO j = 1, ncol
     478          276 :          DO i = 1, ncol
     479          258 :             CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
     480          274 :             IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     481            2 :                gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     482            2 :                test = overlap%local_data(il, jl)
     483            2 :                eps = ABS(test - gold)
     484            2 :                IF (eps > eps_warning) EXIT outer
     485              :             END IF
     486              :          END DO
     487              :       END DO outer
     488              : #else
     489              :       outer: DO j = 1, ncol
     490              :          DO i = 1, ncol
     491              :             gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     492              :             test = overlap%local_data(i, j)
     493              :             eps = ABS(test - gold)
     494              :             IF (eps > eps_warning) EXIT outer
     495              :          END DO
     496              :       END DO outer
     497              : #endif
     498              : 
     499            2 :       IF (eps > eps_warning) THEN
     500            1 :          IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     501            1 :             diag_type_name = "SYGVX"
     502            0 :          ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     503            0 :             diag_type_name = "ELPA"
     504            0 :          ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     505            0 :             diag_type_name = "CUSOLVER"
     506            0 :          ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     507            0 :             diag_type_name = "DLAF"
     508              :          ELSE
     509            0 :             CPABORT("Unknown diag_type")
     510              :          END IF
     511              :          WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
     512            1 :             "The generalized eigenvectors returned by "//TRIM(diag_type_name)//" are not S-orthonormal", &
     513            1 :             "Matrix element (", i, ", ", j, ") = ", test, &
     514            2 :             "The deviation from the expected value ", gold, " is", eps
     515            1 :          IF (eps > eps_abort) THEN
     516            0 :             CPABORT("ERROR in "//routineN//": Check of generalized matrix diagonalization failed")
     517              :          ELSE
     518            1 :             CPWARN("Check of generalized matrix diagonalization failed in routine "//routineN)
     519              :          END IF
     520              :       END IF
     521              : 
     522            2 :       CALL timestop(handle)
     523              : 
     524              :    END SUBROUTINE check_generalized_diag
     525              : 
     526              : ! **************************************************************************************************
     527              : !> \brief Issues an error messages and exits (optionally only warns).
     528              : !> \param mesg message to be issued
     529              : !> \param info error code (optional)
     530              : !> \param warn only warn (optional)
     531              : ! **************************************************************************************************
     532            0 :    SUBROUTINE cp_fm_error(mesg, info, warn)
     533              :       CHARACTER(LEN=*), INTENT(IN)                       :: mesg
     534              :       INTEGER, INTENT(IN), OPTIONAL                      :: info
     535              :       LOGICAL, INTENT(IN), OPTIONAL                      :: warn
     536              : 
     537              :       CHARACTER(LEN=2*default_string_length)             :: message
     538              :       LOGICAL                                            :: warning
     539              : 
     540            0 :       IF (PRESENT(info)) THEN
     541            0 :          WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
     542              :       ELSE
     543            0 :          WRITE (message, "(A)") mesg
     544              :       END IF
     545              : 
     546            0 :       IF (PRESENT(warn)) THEN
     547            0 :          warning = warn
     548              :       ELSE ! abort
     549              :          warning = .FALSE.
     550              :       END IF
     551              : 
     552            0 :       IF (warning) THEN
     553            0 :          CPWARN(TRIM(message))
     554              :       ELSE
     555            0 :          CPABORT(TRIM(message))
     556              :       END IF
     557            0 :    END SUBROUTINE cp_fm_error
     558              : 
     559              : ! **************************************************************************************************
     560              : !> \brief   Computes all eigenvalues and vectors of a real symmetric matrix
     561              : !>          significantly faster than syevx, scales also much better.
     562              : !>          Needs workspace to allocate all the eigenvectors
     563              : !> \param matrix ...
     564              : !> \param eigenvectors ...
     565              : !> \param eigenvalues ...
     566              : !> \param info ...
     567              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     568              : !> \par     info If present returns error code and prevents program stops.
     569              : !>               Works currently only for scalapack.
     570              : !>               Other solvers will end the program regardless of PRESENT(info).
     571              : ! **************************************************************************************************
     572       188202 :    SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     573              : 
     574              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     575              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     576              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     577              : 
     578              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd'
     579              : 
     580              :       INTEGER                                  :: handle, myinfo, n, nmo
     581              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
     582              : #if defined(__parallel)
     583              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     584              : #else
     585              :       INTEGER                                  :: liwork, lwork
     586              :       INTEGER, DIMENSION(:), POINTER           :: iwork
     587              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m
     588              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     589              :       INTEGER, TARGET                          :: v(1)
     590              :       REAL(KIND=dp), TARGET                    :: w(1)
     591              : #endif
     592              : 
     593       188202 :       CALL timeset(routineN, handle)
     594              : 
     595       188202 :       myinfo = 0
     596              : 
     597       188202 :       n = matrix%matrix_struct%nrow_global
     598       564606 :       ALLOCATE (eig(n))
     599              : 
     600              : #if defined(__parallel)
     601              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     602              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     603              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     604              :       ! to the original matrix if no redistribution is required
     605       188202 :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
     606              : 
     607              :       ! Call scalapack on CPUs that hold the new matrix
     608       188202 :       IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
     609        96028 :          IF (PRESENT(info)) THEN
     610         2780 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
     611              :          ELSE
     612        93248 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
     613              :          END IF
     614              :       END IF
     615              :       ! Redistribute results and clean up
     616       188202 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
     617              : #else
     618              :       ! Retrieve the optimal work array sizes first
     619              :       lwork = -1
     620              :       liwork = -1
     621              :       m => matrix%local_data
     622              :       iwork => v
     623              :       work => w
     624              : 
     625              :       CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     626              : 
     627              :       IF (myinfo /= 0) THEN
     628              :          CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
     629              :       END IF
     630              : 
     631              :       ! Reallocate work arrays and perform diagonalisation
     632              :       lwork = NINT(work(1))
     633              :       ALLOCATE (work(lwork))
     634              : 
     635              :       liwork = iwork(1)
     636              :       ALLOCATE (iwork(liwork))
     637              : 
     638              :       CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     639              : 
     640              :       IF (myinfo /= 0) THEN
     641              :          CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     642              :       END IF
     643              : 
     644              :       CALL cp_fm_to_fm(matrix, eigenvectors)
     645              : 
     646              :       DEALLOCATE (iwork)
     647              :       DEALLOCATE (work)
     648              : #endif
     649              : 
     650       188202 :       IF (PRESENT(info)) info = myinfo
     651              : 
     652       188202 :       nmo = SIZE(eigenvalues, 1)
     653       188202 :       IF (nmo > n) THEN
     654            0 :          eigenvalues(1:n) = eig(1:n)
     655              :       ELSE
     656      1738683 :          eigenvalues(1:nmo) = eig(1:nmo)
     657              :       END IF
     658              : 
     659       188202 :       DEALLOCATE (eig)
     660              : 
     661       188202 :       CALL check_diag(matrix, eigenvectors, n)
     662              : 
     663       188202 :       CALL timestop(handle)
     664              : 
     665       376404 :    END SUBROUTINE cp_fm_syevd
     666              : 
     667              : ! **************************************************************************************************
     668              : !> \brief ...
     669              : !> \param matrix ...
     670              : !> \param eigenvectors ...
     671              : !> \param eigenvalues ...
     672              : !> \param info ...
     673              : ! **************************************************************************************************
     674        96028 :    SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
     675              : 
     676              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     677              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     678              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     679              : 
     680              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd_base'
     681              : 
     682              :       INTEGER                                  :: handle, myinfo
     683              : #if defined(__parallel)
     684              :       TYPE(cp_blacs_env_type), POINTER         :: context
     685              :       INTEGER                                  :: liwork, lwork, n
     686              :       INTEGER, DIMENSION(9)                    :: descm, descv
     687        96028 :       INTEGER, DIMENSION(:), POINTER           :: iwork
     688        96028 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     689        96028 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m, v
     690              :       REAL(KIND=dp), TARGET                    :: w(1)
     691              : #if defined (__HAS_IEEE_EXCEPTIONS)
     692              :       LOGICAL, DIMENSION(5)                    :: halt
     693              : #endif
     694              : #endif
     695              : 
     696        96028 :       CALL timeset(routineN, handle)
     697              : 
     698        96028 :       myinfo = 0
     699              : 
     700              : #if defined(__parallel)
     701              : 
     702        96028 :       n = matrix%matrix_struct%nrow_global
     703        96028 :       m => matrix%local_data
     704        96028 :       context => matrix%matrix_struct%context
     705       960280 :       descm(:) = matrix%matrix_struct%descriptor(:)
     706              : 
     707        96028 :       v => eigenvectors%local_data
     708       960280 :       descv(:) = eigenvectors%matrix_struct%descriptor(:)
     709              : 
     710        96028 :       liwork = 7*n + 8*context%num_pe(2) + 2
     711       288084 :       ALLOCATE (iwork(liwork))
     712              : 
     713              :       ! Work space query
     714        96028 :       lwork = -1
     715        96028 :       work => w
     716              : 
     717              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     718        96028 :                    work(1), lwork, iwork(1), liwork, myinfo)
     719              : 
     720        96028 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     721            0 :          CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
     722              :       END IF
     723              : 
     724        96028 :       lwork = NINT(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
     725              : #if !defined(__SCALAPACK_NO_WA)
     726              :       ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
     727              :       CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
     728        96028 :                    v(1, 1), 1, 1, descv, work(1), -1, myinfo)
     729              : 
     730        96028 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     731            0 :          CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
     732              :       END IF
     733              : 
     734        96028 :       IF (lwork < (work(1) + 2*n)) THEN
     735        44015 :          lwork = NINT(work(1)) + 2*n ! still wrong by 2*N
     736              :       END IF
     737              : #endif
     738       288084 :       ALLOCATE (work(lwork))
     739              : 
     740              :       ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
     741        96028 :       IF (liwork < iwork(1)) THEN
     742            0 :          liwork = iwork(1)
     743            0 :          DEALLOCATE (iwork)
     744            0 :          ALLOCATE (iwork(liwork))
     745              :       END IF
     746              : 
     747              :       ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
     748              :       ! Therefore, we disable floating point traps temporarily.
     749              : #if defined (__HAS_IEEE_EXCEPTIONS)
     750              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     751              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     752              : #endif
     753              : 
     754              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     755        96028 :                    work(1), lwork, iwork(1), liwork, myinfo)
     756              : 
     757              : #if defined (__HAS_IEEE_EXCEPTIONS)
     758              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     759              : #endif
     760        96028 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     761            0 :          CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     762              :       END IF
     763              : 
     764        96028 :       IF (PRESENT(info)) info = myinfo
     765              : 
     766        96028 :       DEALLOCATE (work)
     767        96028 :       DEALLOCATE (iwork)
     768              : #else
     769              :       MARK_USED(matrix)
     770              :       MARK_USED(eigenvectors)
     771              :       MARK_USED(eigenvalues)
     772              :       myinfo = -1
     773              :       IF (PRESENT(info)) info = myinfo
     774              :       CALL cp_fm_error("ERROR in "//TRIM(routineN)// &
     775              :                        ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
     776              : #endif
     777              : 
     778        96028 :       CALL timestop(handle)
     779              : 
     780        96028 :    END SUBROUTINE cp_fm_syevd_base
     781              : 
     782              : ! **************************************************************************************************
     783              : !> \brief   compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
     784              : !>          If eigenvectors are required this routine will replicate a full matrix on each CPU...
     785              : !>          if more than a handful of vectors are needed, use cp_fm_syevd instead
     786              : !> \param matrix ...
     787              : !> \param eigenvectors ...
     788              : !> \param eigenvalues ...
     789              : !> \param neig ...
     790              : !> \param work_syevx ...
     791              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     792              : !>          neig   is the number of vectors needed (default all)
     793              : !>          work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
     794              : !>                     reducing this saves time, but might cause the routine to fail
     795              : ! **************************************************************************************************
     796           40 :    SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
     797              : 
     798              :       ! Diagonalise the symmetric n by n matrix using the LAPACK library.
     799              : 
     800              :       TYPE(cp_fm_type), INTENT(IN)                 :: matrix
     801              :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)       :: eigenvectors
     802              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)          :: work_syevx
     803              :       INTEGER, INTENT(IN), OPTIONAL                :: neig
     804              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)     :: eigenvalues
     805              : 
     806              :       CHARACTER(LEN=*), PARAMETER                  :: routineN = "cp_fm_syevx"
     807              : 
     808              : #if defined(__parallel)
     809              :       REAL(KIND=dp), PARAMETER                     :: orfac = -1.0_dp
     810              : #endif
     811              :       REAL(KIND=dp), PARAMETER                     :: vl = 0.0_dp, &
     812              :                                                       vu = 0.0_dp
     813              : 
     814              :       TYPE(cp_blacs_env_type), POINTER             :: context
     815              :       TYPE(cp_logger_type), POINTER                :: logger
     816              :       CHARACTER(LEN=1)                             :: job_type
     817              :       REAL(KIND=dp)                                :: abstol, work_syevx_local
     818              :       INTEGER                                      :: handle, info, liwork, lwork, &
     819              :                                                       m, n, nb, npcol, nprow, &
     820              :                                                       output_unit, neig_local
     821              :       LOGICAL                                      :: ionode, needs_evecs
     822           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: ifail, iwork
     823           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: w, work
     824           40 :       REAL(KIND=dp), DIMENSION(:, :), POINTER      :: a, z
     825              : 
     826              :       REAL(KIND=dp), EXTERNAL                      :: dlamch
     827              : 
     828              : #if defined(__parallel)
     829              :       INTEGER                                      :: nn, np0, npe, nq0, nz
     830              :       INTEGER, DIMENSION(9)                        :: desca, descz
     831           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: iclustr
     832           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: gap
     833              :       INTEGER, EXTERNAL                            :: iceil, numroc
     834              : #else
     835              :       INTEGER                                      :: nla, nlz
     836              :       INTEGER, EXTERNAL                            :: ilaenv
     837              : #endif
     838              : #if defined (__HAS_IEEE_EXCEPTIONS)
     839              :       LOGICAL, DIMENSION(5)                        :: halt
     840              : #endif
     841              : 
     842              :       ! by default all
     843           40 :       n = matrix%matrix_struct%nrow_global
     844           40 :       neig_local = n
     845           40 :       IF (PRESENT(neig)) neig_local = neig
     846           40 :       IF (neig_local == 0) RETURN
     847              : 
     848           40 :       CALL timeset(routineN, handle)
     849              : 
     850           40 :       needs_evecs = PRESENT(eigenvectors)
     851              : 
     852           40 :       logger => cp_get_default_logger()
     853           40 :       ionode = logger%para_env%is_source()
     854           40 :       n = matrix%matrix_struct%nrow_global
     855              : 
     856              :       ! by default allocate all needed space
     857           40 :       work_syevx_local = 1.0_dp
     858           40 :       IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
     859              : 
     860              :       ! set scalapack job type
     861           40 :       IF (needs_evecs) THEN
     862           40 :          job_type = "V"
     863              :       ELSE
     864            0 :          job_type = "N"
     865              :       END IF
     866              : 
     867              :       ! target the most accurate calculation of the eigenvalues
     868           40 :       abstol = 2.0_dp*dlamch("S")
     869              : 
     870           40 :       context => matrix%matrix_struct%context
     871           40 :       nprow = context%num_pe(1)
     872           40 :       npcol = context%num_pe(2)
     873              : 
     874          120 :       ALLOCATE (w(n))
     875          560 :       eigenvalues(:) = 0.0_dp
     876              : #if defined(__parallel)
     877              : 
     878           40 :       IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
     879            0 :          CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
     880              :       END IF
     881              : 
     882           40 :       a => matrix%local_data
     883          400 :       desca(:) = matrix%matrix_struct%descriptor(:)
     884              : 
     885           40 :       IF (needs_evecs) THEN
     886           40 :          z => eigenvectors%local_data
     887          400 :          descz(:) = eigenvectors%matrix_struct%descriptor(:)
     888              :       ELSE
     889              :          ! z will not be referenced
     890            0 :          z => matrix%local_data
     891            0 :          descz = desca
     892              :       END IF
     893              : 
     894              :       ! Get the optimal work storage size
     895              : 
     896           40 :       npe = nprow*npcol
     897           40 :       nb = matrix%matrix_struct%nrow_block
     898           40 :       nn = MAX(n, nb, 2)
     899           40 :       np0 = numroc(nn, nb, 0, 0, nprow)
     900           40 :       nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
     901              : 
     902           40 :       IF (needs_evecs) THEN
     903              :          lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
     904           40 :                  INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
     905              :       ELSE
     906            0 :          lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
     907              :       END IF
     908           40 :       liwork = 6*MAX(N, npe + 1, 4)
     909              : 
     910          120 :       ALLOCATE (gap(npe))
     911          120 :       gap = 0.0_dp
     912          120 :       ALLOCATE (iclustr(2*npe))
     913          200 :       iclustr = 0
     914          120 :       ALLOCATE (ifail(n))
     915          560 :       ifail = 0
     916          120 :       ALLOCATE (iwork(liwork))
     917          120 :       ALLOCATE (work(lwork))
     918              : 
     919              :       ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
     920              :       ! Therefore, we disable floating point traps temporarily.
     921              : #if defined (__HAS_IEEE_EXCEPTIONS)
     922              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     923              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     924              : #endif
     925              :       CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
     926           40 :                    z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
     927              : #if defined (__HAS_IEEE_EXCEPTIONS)
     928              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     929              : #endif
     930              : 
     931              :       ! Error handling
     932           40 :       IF (info /= 0) THEN
     933            0 :          IF (ionode) THEN
     934            0 :             output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     935              :             WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     936            0 :                "info    = ", info, &
     937            0 :                "lwork   = ", lwork, &
     938            0 :                "liwork  = ", liwork, &
     939            0 :                "nz      = ", nz
     940            0 :             IF (info > 0) THEN
     941              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     942            0 :                   "ifail   = ", ifail
     943              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     944            0 :                   "iclustr = ", iclustr
     945              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
     946            0 :                   "gap     = ", gap
     947              :             END IF
     948              :          END IF
     949            0 :          CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
     950              :       END IF
     951              : 
     952              :       ! Release work storage
     953           40 :       DEALLOCATE (gap)
     954           40 :       DEALLOCATE (iclustr)
     955              : 
     956              : #else
     957              : 
     958              :       a => matrix%local_data
     959              :       IF (needs_evecs) THEN
     960              :          z => eigenvectors%local_data
     961              :       ELSE
     962              :          ! z will not be referenced
     963              :          z => matrix%local_data
     964              :       END IF
     965              : 
     966              :       ! Get the optimal work storage size
     967              : 
     968              :       nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
     969              :                ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
     970              : 
     971              :       lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
     972              :       liwork = 5*n
     973              : 
     974              :       ALLOCATE (ifail(n))
     975              :       ifail = 0
     976              :       ALLOCATE (iwork(liwork))
     977              :       ALLOCATE (work(lwork))
     978              :       info = 0
     979              :       nla = SIZE(a, 1)
     980              :       nlz = SIZE(z, 1)
     981              : 
     982              :       ! LAPACK takes advantage of IEEE754 exceptions for speedup.
     983              :       ! Therefore, we disable floating point traps temporarily.
     984              : #if defined (__HAS_IEEE_EXCEPTIONS)
     985              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     986              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     987              : #endif
     988              :       CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
     989              :                   abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
     990              : #if defined (__HAS_IEEE_EXCEPTIONS)
     991              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     992              : #endif
     993              : 
     994              :       ! Error handling
     995              :       IF (info /= 0) THEN
     996              :          output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     997              :          WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     998              :             "info    = ", info
     999              :          IF (info > 0) THEN
    1000              :             WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
    1001              :                "ifail   = ", ifail
    1002              :          END IF
    1003              :          CPABORT("Error in DSYEVX (ScaLAPACK)")
    1004              :       END IF
    1005              : 
    1006              : #endif
    1007              :       ! Release work storage
    1008           40 :       DEALLOCATE (ifail)
    1009           40 :       DEALLOCATE (iwork)
    1010           40 :       DEALLOCATE (work)
    1011          400 :       eigenvalues(1:neig_local) = w(1:neig_local)
    1012           40 :       DEALLOCATE (w)
    1013              : 
    1014           40 :       IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
    1015              : 
    1016           40 :       CALL timestop(handle)
    1017              : 
    1018          120 :    END SUBROUTINE cp_fm_syevx
    1019              : 
    1020              : ! **************************************************************************************************
    1021              : !> \brief decomposes a quadratic matrix into its singular value decomposition
    1022              : !> \param matrix_a ...
    1023              : !> \param matrix_eigvl ...
    1024              : !> \param matrix_eigvr_t ...
    1025              : !> \param eigval ...
    1026              : !> \param info ...
    1027              : !> \author Maximilian Graml
    1028              : ! **************************************************************************************************
    1029          100 :    SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
    1030              : 
    1031              :       TYPE(cp_fm_type), INTENT(IN)              :: matrix_a
    1032              :       TYPE(cp_fm_type), INTENT(INOUT)           :: matrix_eigvl, matrix_eigvr_t
    1033              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
    1034              :          INTENT(INOUT)                          :: eigval
    1035              :       INTEGER, INTENT(OUT), OPTIONAL            :: info
    1036              : 
    1037              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
    1038              : 
    1039              :       INTEGER                                   :: handle, n, m, myinfo, lwork
    1040          100 :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a
    1041              :       TYPE(cp_fm_type)                          :: matrix_lu
    1042              :       REAL(KIND=dp), DIMENSION(:), POINTER      :: work
    1043              :       REAL(KIND=dp), TARGET                     :: w(1)
    1044              : #if defined(__parallel)
    1045              :       INTEGER, DIMENSION(9)                     :: desca, descu, descvt
    1046              : #endif
    1047              : 
    1048          100 :       CALL timeset(routineN, handle)
    1049              : 
    1050              :       CALL cp_fm_create(matrix=matrix_lu, &
    1051              :                         matrix_struct=matrix_a%matrix_struct, &
    1052          100 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
    1053          100 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
    1054          100 :       a => matrix_lu%local_data
    1055          100 :       m = matrix_lu%matrix_struct%nrow_global
    1056          100 :       n = matrix_lu%matrix_struct%ncol_global
    1057              :       ! Assert that incoming matrix is quadratic
    1058          100 :       CPASSERT(m == n)
    1059              : 
    1060              :       ! Prepare for workspace queries
    1061          100 :       myinfo = 0
    1062          100 :       lwork = -1
    1063          100 :       work => w
    1064              : #if defined(__parallel)
    1065              :       ! To do: That might need a redistribution check as in cp_fm_syevd
    1066         1000 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1067         1000 :       descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
    1068         1000 :       descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
    1069              : 
    1070              :       ! Workspace query
    1071              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
    1072          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
    1073              : 
    1074          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
    1075            0 :          CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
    1076              :       END IF
    1077              : 
    1078          100 :       lwork = NINT(work(1))
    1079          300 :       ALLOCATE (work(lwork))
    1080              : 
    1081              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
    1082          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
    1083              : 
    1084          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
    1085            0 :          CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
    1086              :       END IF
    1087              : #else
    1088              :       ! Workspace query
    1089              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
    1090              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
    1091              : 
    1092              :       IF (myinfo /= 0) THEN
    1093              :          CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
    1094              :       END IF
    1095              : 
    1096              :       lwork = NINT(work(1))
    1097              :       ALLOCATE (work(lwork))
    1098              : 
    1099              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
    1100              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
    1101              : 
    1102              :       IF (myinfo /= 0) THEN
    1103              :          CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
    1104              :       END IF
    1105              : 
    1106              : #endif
    1107              :       ! Release intermediary matrices
    1108          100 :       DEALLOCATE (work)
    1109          100 :       CALL cp_fm_release(matrix_lu)
    1110              : 
    1111          100 :       IF (PRESENT(info)) info = myinfo
    1112              : 
    1113          100 :       CALL timestop(handle)
    1114          100 :    END SUBROUTINE cp_fm_svd
    1115              : 
    1116              : ! **************************************************************************************************
    1117              : !> \brief ...
    1118              : !> \param matrix ...
    1119              : !> \param work ...
    1120              : !> \param exponent ...
    1121              : !> \param threshold ...
    1122              : !> \param n_dependent ...
    1123              : !> \param verbose ...
    1124              : !> \param eigvals ...
    1125              : ! **************************************************************************************************
    1126         2198 :    SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
    1127              : 
    1128              :       ! Raise the real symmetric n by n matrix to the power given by
    1129              :       ! the exponent. All eigenvectors with a corresponding eigenvalue lower
    1130              :       ! than threshold are quenched. result in matrix
    1131              : 
    1132              :       ! - Creation (29.03.1999, Matthias Krack)
    1133              :       ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
    1134              : 
    1135              :       TYPE(cp_fm_type), INTENT(IN)               :: matrix, work
    1136              :       REAL(KIND=dp), INTENT(IN)                  :: exponent, threshold
    1137              :       INTEGER, INTENT(OUT)                       :: n_dependent
    1138              :       LOGICAL, INTENT(IN), OPTIONAL              :: verbose
    1139              :       REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
    1140              :          OPTIONAL                                :: eigvals
    1141              : 
    1142              :       CHARACTER(LEN=*), PARAMETER                :: routineN = 'cp_fm_power'
    1143              : 
    1144              :       INTEGER                                    :: handle, icol_global, &
    1145              :                                                     mypcol, myprow, &
    1146              :                                                     ncol_global, nrow_global
    1147              :       LOGICAL                                    :: my_verbose
    1148              :       REAL(KIND=dp)                              :: condition_number, f, p
    1149              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE   :: eigenvalues
    1150         2198 :       REAL(KIND=dp), DIMENSION(:, :), POINTER    :: eigenvectors
    1151              :       TYPE(cp_blacs_env_type), POINTER           :: context
    1152              : 
    1153              : #if defined(__parallel)
    1154              :       INTEGER           :: icol_local, ipcol, iprow, irow_global, irow_local
    1155              : #endif
    1156              : 
    1157         2198 :       CALL timeset(routineN, handle)
    1158              : 
    1159         2198 :       my_verbose = .FALSE.
    1160         2198 :       IF (PRESENT(verbose)) my_verbose = verbose
    1161              : 
    1162         2198 :       context => matrix%matrix_struct%context
    1163         2198 :       myprow = context%mepos(1)
    1164         2198 :       mypcol = context%mepos(2)
    1165         2198 :       n_dependent = 0
    1166         2198 :       p = 0.5_dp*exponent
    1167              : 
    1168         2198 :       nrow_global = matrix%matrix_struct%nrow_global
    1169         2198 :       ncol_global = matrix%matrix_struct%ncol_global
    1170              : 
    1171         6594 :       ALLOCATE (eigenvalues(ncol_global))
    1172        64505 :       eigenvalues(:) = 0.0_dp
    1173              : 
    1174              :       ! Compute the eigenvectors and eigenvalues
    1175              : 
    1176         2198 :       CALL choose_eigv_solver(matrix, work, eigenvalues)
    1177              : 
    1178         2198 :       IF (PRESENT(eigvals)) THEN
    1179          350 :          eigvals(1) = eigenvalues(1)
    1180          350 :          eigvals(2) = eigenvalues(ncol_global)
    1181              :       END IF
    1182              : 
    1183              : #if defined(__parallel)
    1184         2198 :       eigenvectors => work%local_data
    1185              : 
    1186              :       ! Build matrix**exponent with eigenvector quenching
    1187              : 
    1188        64505 :       DO icol_global = 1, ncol_global
    1189              : 
    1190        64505 :          IF (eigenvalues(icol_global) < threshold) THEN
    1191              : 
    1192           50 :             n_dependent = n_dependent + 1
    1193              : 
    1194           50 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1195              : 
    1196           50 :             IF (mypcol == ipcol) THEN
    1197           50 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1198         5850 :                DO irow_global = 1, nrow_global
    1199         5800 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1200         5850 :                   IF (myprow == iprow) THEN
    1201         2900 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1202         2900 :                      eigenvectors(irow_local, icol_local) = 0.0_dp
    1203              :                   END IF
    1204              :                END DO
    1205              :             END IF
    1206              : 
    1207              :          ELSE
    1208              : 
    1209        62257 :             f = eigenvalues(icol_global)**p
    1210              : 
    1211        62257 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1212              : 
    1213        62257 :             IF (mypcol == ipcol) THEN
    1214        62257 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1215      3933232 :                DO irow_global = 1, nrow_global
    1216      3870975 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1217      3933232 :                   IF (myprow == iprow) THEN
    1218      1979562 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1219              :                      eigenvectors(irow_local, icol_local) = &
    1220      1979562 :                         f*eigenvectors(irow_local, icol_local)
    1221              :                   END IF
    1222              :                END DO
    1223              :             END IF
    1224              : 
    1225              :          END IF
    1226              : 
    1227              :       END DO
    1228              : 
    1229              : #else
    1230              : 
    1231              :       eigenvectors => work%local_data
    1232              : 
    1233              :       ! Build matrix**exponent with eigenvector quenching
    1234              : 
    1235              :       DO icol_global = 1, ncol_global
    1236              : 
    1237              :          IF (eigenvalues(icol_global) < threshold) THEN
    1238              : 
    1239              :             n_dependent = n_dependent + 1
    1240              :             eigenvectors(1:nrow_global, icol_global) = 0.0_dp
    1241              : 
    1242              :          ELSE
    1243              : 
    1244              :             f = eigenvalues(icol_global)**p
    1245              :             eigenvectors(1:nrow_global, icol_global) = &
    1246              :                f*eigenvectors(1:nrow_global, icol_global)
    1247              : 
    1248              :          END IF
    1249              : 
    1250              :       END DO
    1251              : 
    1252              : #endif
    1253         2198 :       CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
    1254         2198 :       CALL cp_fm_uplo_to_full(matrix, work)
    1255              : 
    1256              :       ! Print some warnings/notes
    1257         2198 :       IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
    1258            0 :          condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
    1259              :          WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
    1260            0 :             "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
    1261            0 :             "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
    1262            0 :             "CP_FM_POWER: condition number:   ", condition_number
    1263            0 :          IF (eigenvalues(1) <= 0.0_dp) THEN
    1264              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1265            0 :                "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
    1266              :          END IF
    1267            0 :          IF (condition_number > 1.0E12_dp) THEN
    1268              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1269            0 :                "WARNING: high condition number => possibly ill-conditioned matrix"
    1270              :          END IF
    1271              :       END IF
    1272              : 
    1273         2198 :       DEALLOCATE (eigenvalues)
    1274              : 
    1275         2198 :       CALL timestop(handle)
    1276              : 
    1277         2198 :    END SUBROUTINE cp_fm_power
    1278              : 
    1279              : ! **************************************************************************************************
    1280              : !> \brief ...
    1281              : !> \param matrix ...
    1282              : !> \param eigenvectors ...
    1283              : !> \param eigval ...
    1284              : !> \param thresh ...
    1285              : !> \param start_sec_block ...
    1286              : ! **************************************************************************************************
    1287           18 :    SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
    1288              : 
    1289              :       ! Calculates block diagonalization of a full symmetric matrix
    1290              :       ! It has its origin in cp_fm_syevx. This routine rotates only elements
    1291              :       ! which are larger than a threshold values "thresh".
    1292              :       ! start_sec_block is the start of the second block.
    1293              :       ! IT DOES ONLY ONE SWEEP!
    1294              : 
    1295              :       ! - Creation (07.10.2002, Martin Fengler)
    1296              :       ! - Cosmetics (05.04.06, MK)
    1297              : 
    1298              :       TYPE(cp_fm_type), INTENT(IN)              :: eigenvectors, matrix
    1299              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)   :: eigval
    1300              :       INTEGER, INTENT(IN)                       :: start_sec_block
    1301              :       REAL(KIND=dp), INTENT(IN)                 :: thresh
    1302              : 
    1303              :       CHARACTER(len=*), PARAMETER               :: routineN = 'cp_fm_block_jacobi'
    1304              : 
    1305              :       INTEGER :: handle
    1306              :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a, ev
    1307              : 
    1308              :       REAL(KIND=dp) :: tan_theta, tau, c, s
    1309              :       INTEGER  :: q, p, N
    1310           18 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE  :: c_ip
    1311              : 
    1312              : #if defined(__parallel)
    1313              :       TYPE(cp_blacs_env_type), POINTER :: context
    1314              : 
    1315              :       INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
    1316              :                  ev_row_block_size, iam, mynumrows, mype, npe, q_loc
    1317           18 :       REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE  :: a_loc, ev_loc
    1318              :       INTEGER, DIMENSION(9)                        :: desca, descz, &
    1319              :                                                       desc_a_block, &
    1320              :                                                       desc_ev_loc
    1321              :       TYPE(mp_comm_type):: allgrp
    1322              :       TYPE(cp_blacs_type) :: ictxt_loc
    1323              :       INTEGER, EXTERNAL :: numroc
    1324              : #endif
    1325              : 
    1326              :       ! -------------------------------------------------------------------------
    1327              : 
    1328           18 :       CALL timeset(routineN, handle)
    1329              : 
    1330              : #if defined(__parallel)
    1331           18 :       context => matrix%matrix_struct%context
    1332           18 :       allgrp = matrix%matrix_struct%para_env
    1333              : 
    1334           18 :       nprow = context%num_pe(1)
    1335           18 :       npcol = context%num_pe(2)
    1336              : 
    1337           18 :       N = matrix%matrix_struct%nrow_global
    1338              : 
    1339           18 :       A => matrix%local_data
    1340          180 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1341           18 :       EV => eigenvectors%local_data
    1342          180 :       descz(:) = eigenvectors%matrix_struct%descriptor(:)
    1343              : 
    1344              :       ! Copy the block to be rotated to the master process firstly and broadcast to all processes
    1345              :       ! start_sec_block defines where the second block starts!
    1346              :       ! Block will be processed together with the OO block
    1347              : 
    1348           18 :       block_dim_row = start_sec_block - 1
    1349           18 :       block_dim_col = N - block_dim_row
    1350           72 :       ALLOCATE (A_loc(block_dim_row, block_dim_col))
    1351              : 
    1352           18 :       mype = matrix%matrix_struct%para_env%mepos
    1353           18 :       npe = matrix%matrix_struct%para_env%num_pe
    1354              :       ! Get a new context
    1355           18 :       CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
    1356              : 
    1357              :       CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
    1358           18 :                     block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
    1359              : 
    1360              :       CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
    1361           18 :                     A_loc, 1, 1, desc_a_block, context%get_handle())
    1362              :       ! Only the master (root) process received data yet
    1363           18 :       CALL allgrp%bcast(A_loc, 0)
    1364              : 
    1365              :       ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
    1366              :       ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
    1367              : 
    1368              :       ! Initialize distribution of the eigenvectors
    1369           18 :       iam = mype
    1370           18 :       ev_row_block_size = n/(nprow*npcol)
    1371           18 :       mynumrows = NUMROC(N, ev_row_block_size, iam, 0, nprow*npcol)
    1372              : 
    1373          108 :       ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
    1374              : 
    1375              :       CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
    1376           18 :                     mynumrows, info)
    1377              : 
    1378           18 :       CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
    1379              : 
    1380              :       ! Start block diagonalization of matrix
    1381              : 
    1382           18 :       q_loc = 0
    1383              : 
    1384         1170 :       DO q = start_sec_block, N
    1385         1152 :          q_loc = q_loc + 1
    1386       148626 :          DO p = 1, (start_sec_block - 1)
    1387              : 
    1388       148608 :             IF (ABS(A_loc(p, q_loc)) > thresh) THEN
    1389              : 
    1390       117566 :                tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
    1391              : 
    1392       117566 :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1393              : 
    1394              :                ! Cos(theta)
    1395       117566 :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1396       117566 :                s = tan_theta*c
    1397              : 
    1398              :                ! Calculate eigenvectors: Q*J
    1399              :                ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
    1400              :                ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
    1401              :                ! EV(:,p) = c_ip
    1402              :                ! EV(:,q) = c_iq
    1403       117566 :                CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
    1404       117566 :                CALL dscal(mynumrows, c, EV_loc(1, p), 1)
    1405       117566 :                CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
    1406       117566 :                CALL dscal(mynumrows, c, EV_loc(1, q), 1)
    1407       117566 :                CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
    1408              : 
    1409              :             END IF
    1410              : 
    1411              :          END DO
    1412              :       END DO
    1413              : 
    1414              :       ! Copy eigenvectors back to the original distribution
    1415           18 :       CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
    1416              : 
    1417              :       ! Release work storage
    1418           18 :       DEALLOCATE (A_loc, EV_loc, c_ip)
    1419              : 
    1420           18 :       CALL ictxt_loc%gridexit()
    1421              : 
    1422              : #else
    1423              : 
    1424              :       N = matrix%matrix_struct%nrow_global
    1425              : 
    1426              :       ALLOCATE (c_ip(N)) ! Local eigenvalue vector
    1427              : 
    1428              :       A => matrix%local_data ! Contains the Matrix to be worked on
    1429              :       EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
    1430              : 
    1431              :       ! Start matrix diagonalization
    1432              : 
    1433              :       tan_theta = 0.0_dp
    1434              :       tau = 0.0_dp
    1435              : 
    1436              :       DO q = start_sec_block, N
    1437              :          DO p = 1, (start_sec_block - 1)
    1438              : 
    1439              :             IF (ABS(A(p, q)) > thresh) THEN
    1440              : 
    1441              :                tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
    1442              : 
    1443              :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1444              : 
    1445              :                ! Cos(theta)
    1446              :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1447              :                s = tan_theta*c
    1448              : 
    1449              :                ! Calculate eigenvectors: Q*J
    1450              :                ! c_ip = c*EV(:,p) - s*EV(:,q)
    1451              :                ! c_iq = s*EV(:,p) + c*EV(:,q)
    1452              :                ! EV(:,p) = c_ip
    1453              :                ! EV(:,q) = c_iq
    1454              :                CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
    1455              :                CALL dscal(N, c, EV(1, p), 1)
    1456              :                CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
    1457              :                CALL dscal(N, c, EV(1, q), 1)
    1458              :                CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
    1459              : 
    1460              :             END IF
    1461              : 
    1462              :          END DO
    1463              :       END DO
    1464              : 
    1465              :       ! Release work storage
    1466              : 
    1467              :       DEALLOCATE (c_ip)
    1468              : 
    1469              : #endif
    1470              : 
    1471           18 :       CALL timestop(handle)
    1472              : 
    1473           90 :    END SUBROUTINE cp_fm_block_jacobi
    1474              : 
    1475              : ! **************************************************************************************************
    1476              : !> \brief General Eigenvalue Problem AX = BXE.
    1477              : !>        Use cuSOLVERMp directly when requested and large enough; otherwise
    1478              : !>        reduce the problem through a Cholesky decomposition of B.
    1479              : !> \param amatrix ...
    1480              : !> \param bmatrix ...
    1481              : !> \param eigenvectors ...
    1482              : !> \param eigenvalues ...
    1483              : !> \param work ...
    1484              : ! **************************************************************************************************
    1485          818 :    SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
    1486              : 
    1487              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1488              :       REAL(KIND=dp), DIMENSION(:)                        :: eigenvalues
    1489              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1490              : 
    1491              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig'
    1492              : 
    1493              :       INTEGER                                            :: handle, nao, nmo
    1494              :       LOGICAL                                            :: check_eigenvectors
    1495              :       TYPE(cp_fm_type)                                   :: overlap_check, scratch_check
    1496              : 
    1497          818 :       CALL timeset(routineN, handle)
    1498              : 
    1499          818 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1500          818 :       nmo = SIZE(eigenvalues)
    1501          818 :       check_eigenvectors = diag_check_requested()
    1502              : 
    1503          818 :       IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. direct_generalized_diagonalization .AND. &
    1504              :           nao >= cusolver_n_min) THEN
    1505              :          ! Use cuSolverMP generalized eigenvalue solver without a CP2K-side
    1506              :          ! Cholesky reduction.
    1507              :          ! Use work as intermediate buffer since eigenvectors may be smaller (nao x nmo)
    1508            0 :          IF (check_eigenvectors) THEN
    1509            0 :             CALL cp_fm_create(overlap_check, bmatrix%matrix_struct)
    1510            0 :             CALL cp_fm_create(scratch_check, bmatrix%matrix_struct)
    1511            0 :             CALL cp_fm_to_fm(bmatrix, overlap_check)
    1512              :          END IF
    1513            0 :          CALL cp_fm_general_cusolver(amatrix, bmatrix, work, eigenvalues)
    1514            0 :          IF (check_eigenvectors) THEN
    1515            0 :             CALL check_generalized_diag(overlap_check, work, scratch_check, nmo)
    1516            0 :             CALL cp_fm_release(scratch_check)
    1517            0 :             CALL cp_fm_release(overlap_check)
    1518              :          END IF
    1519            0 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1520              : #if defined(__parallel)
    1521          818 :       ELSE IF (diag_type == FM_DIAG_TYPE_SCALAPACK .AND. direct_generalized_diagonalization) THEN
    1522              :          ! Use ScaLAPACK generalized eigenvalue solver without a CP2K-side
    1523              :          ! Cholesky reduction.
    1524            2 :          IF (check_eigenvectors) THEN
    1525            2 :             CALL cp_fm_create(overlap_check, bmatrix%matrix_struct)
    1526            2 :             CALL cp_fm_create(scratch_check, bmatrix%matrix_struct)
    1527            2 :             CALL cp_fm_to_fm(bmatrix, overlap_check)
    1528              :          END IF
    1529            2 :          CALL cp_fm_geeig_scalapack(amatrix, bmatrix, work, eigenvalues)
    1530            2 :          IF (check_eigenvectors) THEN
    1531            2 :             CALL check_generalized_diag(overlap_check, work, scratch_check, nmo)
    1532            2 :             CALL cp_fm_release(scratch_check)
    1533            2 :             CALL cp_fm_release(overlap_check)
    1534              :          END IF
    1535            2 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1536              : #endif
    1537              : #if defined(__DLAF)
    1538              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. direct_generalized_diagonalization .AND. &
    1539              :                nao >= dlaf_neigvec_min) THEN
    1540              :          ! Use DLA-Future generalized eigenvalue solver for large matrices
    1541              :          IF (check_eigenvectors) THEN
    1542              :             CALL cp_fm_create(overlap_check, bmatrix%matrix_struct)
    1543              :             CALL cp_fm_create(scratch_check, bmatrix%matrix_struct)
    1544              :             CALL cp_fm_to_fm(bmatrix, overlap_check)
    1545              :          END IF
    1546              :          CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, work, eigenvalues)
    1547              :          IF (check_eigenvectors) THEN
    1548              :             CALL check_generalized_diag(overlap_check, work, scratch_check, nmo)
    1549              :             CALL cp_fm_release(scratch_check)
    1550              :             CALL cp_fm_release(overlap_check)
    1551              :          END IF
    1552              :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1553              : #endif
    1554              :       ELSE
    1555              :          ! Cholesky decompose S=U(T)U
    1556          816 :          CALL cp_fm_cholesky_decompose(bmatrix)
    1557              :          ! Invert to get U^(-1)
    1558          816 :          CALL cp_fm_triangular_invert(bmatrix)
    1559              :          ! Reduce to get U^(-T) * H * U^(-1)
    1560          816 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
    1561          816 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
    1562              :          ! Diagonalize
    1563              :          CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
    1564          816 :                                  eigenvalues=eigenvalues)
    1565              :          ! Restore vectors C = U^(-1) * C*
    1566          816 :          CALL cp_fm_triangular_multiply(bmatrix, work)
    1567          816 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1568              :       END IF
    1569              : 
    1570          818 :       CALL timestop(handle)
    1571              : 
    1572          818 :    END SUBROUTINE cp_fm_geeig
    1573              : 
    1574              : ! **************************************************************************************************
    1575              : !> \brief General Eigenvalue Problem AX = BXE using ScaLAPACK PDSYGVX.
    1576              : !> \param amatrix ...
    1577              : !> \param bmatrix ...
    1578              : !> \param eigenvectors ...
    1579              : !> \param eigenvalues ...
    1580              : ! **************************************************************************************************
    1581            2 :    SUBROUTINE cp_fm_geeig_scalapack(amatrix, bmatrix, eigenvectors, eigenvalues)
    1582              : 
    1583              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1584              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
    1585              : 
    1586              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig_scalapack'
    1587              : 
    1588              : #if defined(__parallel)
    1589              :       REAL(KIND=dp), PARAMETER                           :: orfac = -1.0_dp, &
    1590              :                                                             vl = 0.0_dp, &
    1591              :                                                             vu = 0.0_dp
    1592              : 
    1593              :       INTEGER                                            :: handle, info, liwork, lwork, m, n, nb, &
    1594              :                                                             neig, npcol, nprow, nz
    1595              :       INTEGER, DIMENSION(9)                              :: desca, descb, descz
    1596            2 :       INTEGER, DIMENSION(:), ALLOCATABLE                 :: iclustr, ifail, iwork
    1597              :       REAL(KIND=dp)                                      :: abstol
    1598            2 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE           :: gap, w, work
    1599            2 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, z
    1600              : 
    1601              :       INTEGER                                            :: mq0, nn, np0, npe
    1602              :       INTEGER, EXTERNAL                                  :: iceil, numroc
    1603              :       REAL(KIND=dp), EXTERNAL                            :: dlamch
    1604              : #if defined (__HAS_IEEE_EXCEPTIONS)
    1605              :       LOGICAL, DIMENSION(5)                              :: halt
    1606              : #endif
    1607              : #else
    1608              :       INTEGER                                            :: handle
    1609              : #endif
    1610              : 
    1611            2 :       CALL timeset(routineN, handle)
    1612              : 
    1613              : #if defined(__parallel)
    1614            2 :       n = amatrix%matrix_struct%nrow_global
    1615            2 :       neig = MIN(SIZE(eigenvalues), n)
    1616              : 
    1617            2 :       IF (neig == 0) THEN
    1618            0 :          CALL timestop(handle)
    1619            0 :          RETURN
    1620              :       END IF
    1621              : 
    1622            2 :       IF (amatrix%matrix_struct%nrow_block /= amatrix%matrix_struct%ncol_block) THEN
    1623            0 :          CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
    1624              :       END IF
    1625              : 
    1626            2 :       a => amatrix%local_data
    1627            2 :       b => bmatrix%local_data
    1628            2 :       z => eigenvectors%local_data
    1629           20 :       desca(:) = amatrix%matrix_struct%descriptor(:)
    1630           20 :       descb(:) = bmatrix%matrix_struct%descriptor(:)
    1631           20 :       descz(:) = eigenvectors%matrix_struct%descriptor(:)
    1632              : 
    1633            2 :       nprow = amatrix%matrix_struct%context%num_pe(1)
    1634            2 :       npcol = amatrix%matrix_struct%context%num_pe(2)
    1635            2 :       npe = nprow*npcol
    1636            2 :       nb = amatrix%matrix_struct%nrow_block
    1637            2 :       nn = MAX(n, nb, 2)
    1638            2 :       np0 = numroc(nn, nb, 0, 0, nprow)
    1639            2 :       mq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
    1640              : 
    1641              :       lwork = 5*n + MAX(5*nn, np0*mq0 + 2*nb*nb) + iceil(neig, npe)*nn + &
    1642            2 :               MAX(0, neig - 1)*n
    1643            2 :       liwork = 6*MAX(n, npe + 1, 4)
    1644              : 
    1645            6 :       ALLOCATE (gap(npe))
    1646            6 :       gap = 0.0_dp
    1647            6 :       ALLOCATE (iclustr(2*npe))
    1648           10 :       iclustr = 0
    1649            6 :       ALLOCATE (ifail(n))
    1650          354 :       ifail = 0
    1651            6 :       ALLOCATE (iwork(liwork))
    1652            6 :       ALLOCATE (w(n))
    1653            6 :       ALLOCATE (work(lwork))
    1654              : 
    1655            2 :       abstol = 2.0_dp*dlamch("S")
    1656              : 
    1657              : #if defined (__HAS_IEEE_EXCEPTIONS)
    1658              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
    1659              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
    1660              : #endif
    1661              :       CALL pdsygvx(1, "V", "I", "U", n, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, &
    1662              :                    vl, vu, 1, neig, abstol, m, nz, w(1), orfac, z(1, 1), 1, 1, descz, &
    1663            2 :                    work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap(1), info)
    1664              : #if defined (__HAS_IEEE_EXCEPTIONS)
    1665              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
    1666              : #endif
    1667              : 
    1668            2 :       IF (info /= 0 .OR. m < neig .OR. nz < neig) THEN
    1669            0 :          CPABORT("ERROR in PDSYGVX (ScaLAPACK), info="//TRIM(cp_to_string(info)))
    1670              :       END IF
    1671              : 
    1672           34 :       eigenvalues(:) = 0.0_dp
    1673           34 :       eigenvalues(1:neig) = w(1:neig)
    1674              : 
    1675            2 :       DEALLOCATE (gap, iclustr, ifail, iwork, w, work)
    1676              : #else
    1677              :       MARK_USED(amatrix)
    1678              :       MARK_USED(bmatrix)
    1679              :       MARK_USED(eigenvectors)
    1680              :       MARK_USED(eigenvalues)
    1681              :       CPABORT("ERROR in "//routineN//": PDSYGVX requested without ScaLAPACK support")
    1682              : #endif
    1683              : 
    1684            2 :       CALL timestop(handle)
    1685              : 
    1686            2 :    END SUBROUTINE cp_fm_geeig_scalapack
    1687              : 
    1688              : ! **************************************************************************************************
    1689              : !> \brief General Eigenvalue Problem  AX = BXE
    1690              : !>        Use canonical diagonalization : U*s**(-1/2)
    1691              : !> \param amatrix ...
    1692              : !> \param bmatrix ...
    1693              : !> \param eigenvectors ...
    1694              : !> \param eigenvalues ...
    1695              : !> \param work ...
    1696              : !> \param epseig ...
    1697              : ! **************************************************************************************************
    1698           68 :    SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
    1699              : 
    1700              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1701              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
    1702              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1703              :       REAL(KIND=dp), INTENT(IN)                          :: epseig
    1704              : 
    1705              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig_canon'
    1706              : 
    1707              :       INTEGER                                            :: handle, i, icol, irow, nao, nc, ncol, &
    1708              :                                                             nmo, nx
    1709              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: evals
    1710              : 
    1711           68 :       CALL timeset(routineN, handle)
    1712              : 
    1713              :       ! Test sizees
    1714           68 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1715           68 :       nmo = SIZE(eigenvalues)
    1716          204 :       ALLOCATE (evals(nao))
    1717              : 
    1718              :       ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
    1719           68 :       CALL cp_fm_scale(-1.0_dp, bmatrix)
    1720           68 :       CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
    1721         4726 :       evals(:) = -evals(:)
    1722           68 :       nc = nao
    1723         4434 :       DO i = 1, nao
    1724         4434 :          IF (evals(i) < epseig) THEN
    1725           40 :             nc = i - 1
    1726           40 :             EXIT
    1727              :          END IF
    1728              :       END DO
    1729           68 :       CPASSERT(nc /= 0)
    1730              : 
    1731           68 :       IF (nc /= nao) THEN
    1732           40 :          IF (nc < nmo) THEN
    1733              :             ! Copy NULL space definition to last vectors of eigenvectors (if needed)
    1734            0 :             ncol = nmo - nc
    1735            0 :             CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
    1736              :          END IF
    1737              :          ! Set NULL space in eigenvector matrix of S to zero
    1738          332 :          DO icol = nc + 1, nao
    1739        36172 :             DO irow = 1, nao
    1740        36132 :                CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
    1741              :             END DO
    1742              :          END DO
    1743              :          ! Set small eigenvalues to a dummy save value
    1744          332 :          evals(nc + 1:nao) = 1.0_dp
    1745              :       END IF
    1746              :       ! Calculate U*s**(-1/2)
    1747         4726 :       evals(:) = 1.0_dp/SQRT(evals(:))
    1748           68 :       CALL cp_fm_column_scale(work, evals)
    1749              :       ! Reduce to get U^(-T) * H * U^(-1)
    1750           68 :       CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
    1751           68 :       CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
    1752           68 :       IF (nc /= nao) THEN
    1753              :          ! set diagonal values to save large value
    1754          332 :          DO icol = nc + 1, nao
    1755          332 :             CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
    1756              :          END DO
    1757              :       END IF
    1758              :       ! Diagonalize
    1759           68 :       CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
    1760           68 :       nx = MIN(nc, nmo)
    1761              :       ! Restore vectors C = U^(-1) * C*
    1762           68 :       CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
    1763              : 
    1764           68 :       DEALLOCATE (evals)
    1765              : 
    1766           68 :       CALL timestop(handle)
    1767              : 
    1768           68 :    END SUBROUTINE cp_fm_geeig_canon
    1769              : 
    1770              : END MODULE cp_fm_diag
        

Generated by: LCOV version 2.0-1