LCOV - code coverage report
Current view: top level - src/fm - cp_fm_elpa.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:51fc4cd) Lines: 83.9 % 149 125
Test Date: 2026-02-04 06:28:27 Functions: 100.0 % 5 5

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Wrapper for ELPA
      10              : !> \author Ole Schuett
      11              : ! **************************************************************************************************
      12              : MODULE cp_fm_elpa
      13              :    USE cp_log_handling, ONLY: cp_to_string
      14              :    USE machine, ONLY: m_cpuid, &
      15              :                       MACHINE_X86, &
      16              :                       MACHINE_CPU_GENERIC, &
      17              :                       MACHINE_X86_SSE4, &
      18              :                       MACHINE_X86_AVX, &
      19              :                       MACHINE_X86_AVX2
      20              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21              :    USE cp_fm_basic_linalg, ONLY: cp_fm_uplo_to_full
      22              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
      23              :                                cp_fm_redistribute_end, &
      24              :                                cp_fm_redistribute_info
      25              :    USE cp_fm_struct, ONLY: cp_fm_struct_get
      26              :    USE cp_fm_types, ONLY: cp_fm_type, &
      27              :                           cp_fm_to_fm, &
      28              :                           cp_fm_release, &
      29              :                           cp_fm_create, &
      30              :                           cp_fm_write_info
      31              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      32              :                               cp_logger_get_default_io_unit, &
      33              :                               cp_logger_type
      34              :    USE kinds, ONLY: default_string_length, dp
      35              :    USE message_passing, ONLY: mp_comm_type
      36              :    USE OMP_LIB, ONLY: omp_get_max_threads
      37              : #if defined(__HAS_IEEE_EXCEPTIONS)
      38              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      39              :                               ieee_set_halting_mode, &
      40              :                               IEEE_ALL
      41              : #endif
      42              : #include "../base/base_uses.f90"
      43              : 
      44              : #if defined(__ELPA)
      45              :    USE elpa_constants, ONLY: ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, ELPA_OK, &
      46              :                              ELPA_2STAGE_REAL_INVALID, &
      47              :                              ELPA_2STAGE_REAL_DEFAULT, &
      48              :                              ELPA_2STAGE_REAL_GENERIC, &
      49              :                              ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      50              :                              ELPA_2STAGE_REAL_BGP, &
      51              :                              ELPA_2STAGE_REAL_BGQ, &
      52              :                              ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      53              :                              ELPA_2STAGE_REAL_SSE_BLOCK2, &
      54              :                              ELPA_2STAGE_REAL_SSE_BLOCK4, &
      55              :                              ELPA_2STAGE_REAL_SSE_BLOCK6, &
      56              :                              ELPA_2STAGE_REAL_AVX_BLOCK2, &
      57              :                              ELPA_2STAGE_REAL_AVX_BLOCK4, &
      58              :                              ELPA_2STAGE_REAL_AVX_BLOCK6, &
      59              :                              ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      60              :                              ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      61              :                              ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      62              :                              ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      63              :                              ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      64              :                              ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      65              :                              ELPA_2STAGE_REAL_NVIDIA_GPU, &
      66              :                              ELPA_2STAGE_REAL_AMD_GPU, &
      67              :                              ELPA_2STAGE_REAL_INTEL_GPU_SYCL
      68              : 
      69              :    USE elpa, ONLY: elpa_t, elpa_init, elpa_uninit, &
      70              :                    elpa_allocate, elpa_deallocate
      71              : #endif
      72              : 
      73              :    IMPLICIT NONE
      74              : 
      75              :    PRIVATE
      76              : 
      77              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
      78              : 
      79              : #if defined(__ELPA)
      80              :    INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
      81              :                                         ELPA_2STAGE_REAL_INVALID, & ! auto
      82              :                                         ELPA_2STAGE_REAL_GENERIC, &
      83              :                                         ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      84              :                                         ELPA_2STAGE_REAL_BGP, &
      85              :                                         ELPA_2STAGE_REAL_BGQ, &
      86              :                                         ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      87              :                                         ELPA_2STAGE_REAL_SSE_BLOCK2, &
      88              :                                         ELPA_2STAGE_REAL_SSE_BLOCK4, &
      89              :                                         ELPA_2STAGE_REAL_SSE_BLOCK6, &
      90              :                                         ELPA_2STAGE_REAL_AVX_BLOCK2, &
      91              :                                         ELPA_2STAGE_REAL_AVX_BLOCK4, &
      92              :                                         ELPA_2STAGE_REAL_AVX_BLOCK6, &
      93              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      94              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      95              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      96              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      97              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      98              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      99              :                                         ELPA_2STAGE_REAL_NVIDIA_GPU, &
     100              :                                         ELPA_2STAGE_REAL_AMD_GPU, &
     101              :                                         ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
     102              : 
     103              :    CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     104              :       elpa_kernel_names = [CHARACTER(len=14) :: &
     105              :                            "AUTO", &
     106              :                            "GENERIC", &
     107              :                            "GENERIC_SIMPLE", &
     108              :                            "BGP", &
     109              :                            "BGQ", &
     110              :                            "SSE", &
     111              :                            "SSE_BLOCK2", &
     112              :                            "SSE_BLOCK4", &
     113              :                            "SSE_BLOCK6", &
     114              :                            "AVX_BLOCK2", &
     115              :                            "AVX_BLOCK4", &
     116              :                            "AVX_BLOCK6", &
     117              :                            "AVX2_BLOCK2", &
     118              :                            "AVX2_BLOCK4", &
     119              :                            "AVX2_BLOCK6", &
     120              :                            "AVX512_BLOCK2", &
     121              :                            "AVX512_BLOCK4", &
     122              :                            "AVX512_BLOCK6", &
     123              :                            "NVIDIA_GPU", &
     124              :                            "AMD_GPU", &
     125              :                            "INTEL_GPU"]
     126              : 
     127              :    CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     128              :       elpa_kernel_descriptions = [CHARACTER(len=44) :: &
     129              :                                   "Automatically selected kernel", &
     130              :                                   "Generic kernel", &
     131              :                                   "Simplified generic kernel", &
     132              :                                   "Kernel optimized for IBM BGP", &
     133              :                                   "Kernel optimized for IBM BGQ", &
     134              :                                   "Kernel optimized for x86_64/SSE", &
     135              :                                   "Kernel optimized for x86_64/SSE (block=2)", &
     136              :                                   "Kernel optimized for x86_64/SSE (block=4)", &
     137              :                                   "Kernel optimized for x86_64/SSE (block=6)", &
     138              :                                   "Kernel optimized for Intel AVX (block=2)", &
     139              :                                   "Kernel optimized for Intel AVX (block=4)", &
     140              :                                   "Kernel optimized for Intel AVX (block=6)", &
     141              :                                   "Kernel optimized for Intel AVX2 (block=2)", &
     142              :                                   "Kernel optimized for Intel AVX2 (block=4)", &
     143              :                                   "Kernel optimized for Intel AVX2 (block=6)", &
     144              :                                   "Kernel optimized for Intel AVX-512 (block=2)", &
     145              :                                   "Kernel optimized for Intel AVX-512 (block=4)", &
     146              :                                   "Kernel optimized for Intel AVX-512 (block=6)", &
     147              :                                   "Kernel targeting Nvidia GPUs", &
     148              :                                   "Kernel targeting AMD GPUs", &
     149              :                                   "Kernel targeting Intel GPUs"]
     150              : #else
     151              :    INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
     152              :    CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
     153              :    CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
     154              : #endif
     155              : 
     156              : #if defined(__ELPA)
     157              :    INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
     158              : #endif
     159              : 
     160              :    ! elpa_qr_unsafe: disable block size limitations
     161              :    LOGICAL, SAVE :: elpa_qr_unsafe = .TRUE., &
     162              :                     elpa_print = .FALSE., &
     163              :                     elpa_qr = .FALSE.
     164              : 
     165              : #if defined(__OFFLOAD_OPENCL)
     166              :    LOGICAL, SAVE :: elpa_one_stage = .TRUE.
     167              : #else
     168              :    LOGICAL, SAVE :: elpa_one_stage = .FALSE.
     169              : #endif
     170              : 
     171              :    PUBLIC :: cp_fm_diag_elpa, &
     172              :              set_elpa_kernel, &
     173              :              elpa_one_stage, &
     174              :              elpa_print, &
     175              :              elpa_qr, &
     176              :              elpa_kernel_ids, &
     177              :              elpa_kernel_names, &
     178              :              elpa_kernel_descriptions, &
     179              :              initialize_elpa_library, &
     180              :              finalize_elpa_library
     181              : 
     182              : CONTAINS
     183              : 
     184              : ! **************************************************************************************************
     185              : !> \brief Initialize the ELPA library
     186              : !> \param one_stage ...
     187              : !> \param qr ...
     188              : !> \param should_print flag that determines if additional information
     189              : !>        is printed when the diagonalization routine is called.
     190              : ! **************************************************************************************************
     191         9158 :    SUBROUTINE initialize_elpa_library(one_stage, qr, should_print)
     192              :       LOGICAL, INTENT(IN), OPTIONAL                      :: one_stage, qr, should_print
     193              : 
     194              : #if defined(__ELPA)
     195         9158 :       IF (elpa_init(20180525) /= ELPA_OK) &
     196            0 :          CPABORT("The linked ELPA library does not support the required API version")
     197         9158 :       IF (PRESENT(one_stage)) elpa_one_stage = one_stage
     198         9158 :       IF (PRESENT(should_print)) elpa_print = should_print
     199         9158 :       IF (PRESENT(qr)) elpa_qr = qr
     200              : #else
     201              :       MARK_USED(one_stage)
     202              :       MARK_USED(qr)
     203              :       MARK_USED(should_print)
     204              :       CPABORT("Initialization of ELPA library requested but not enabled during build")
     205              : #endif
     206         9158 :    END SUBROUTINE initialize_elpa_library
     207              : 
     208              : ! **************************************************************************************************
     209              : !> \brief Finalize the ELPA library
     210              : ! **************************************************************************************************
     211         9545 :    SUBROUTINE finalize_elpa_library()
     212              : #if defined(__ELPA)
     213         9545 :       CALL elpa_uninit()
     214              : #else
     215              :       CPABORT("Finalization of ELPA library requested but not enabled during build")
     216              : #endif
     217         9545 :    END SUBROUTINE finalize_elpa_library
     218              : 
     219              : ! **************************************************************************************************
     220              : !> \brief Sets the active ELPA kernel.
     221              : !> \param requested_kernel one of the elpa_kernel_ids
     222              : ! **************************************************************************************************
     223         9158 :    SUBROUTINE set_elpa_kernel(requested_kernel)
     224              :       INTEGER, INTENT(IN)                                :: requested_kernel
     225              : 
     226              : #if defined(__ELPA)
     227              :       INTEGER                                            :: cpuid
     228              : 
     229         9158 :       elpa_kernel = requested_kernel
     230              : 
     231              :       ! Resolve AUTO kernel.
     232         9158 :       IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     233         9154 :          cpuid = m_cpuid()
     234         9154 :          IF ((MACHINE_CPU_GENERIC < cpuid) .AND. (cpuid <= MACHINE_X86)) THEN
     235            0 :             SELECT CASE (cpuid)
     236              :             CASE (MACHINE_X86_SSE4)
     237            0 :                elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
     238              :             CASE (MACHINE_X86_AVX)
     239            0 :                elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
     240              :             CASE (MACHINE_X86_AVX2)
     241         9154 :                elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
     242              :             CASE DEFAULT
     243         9154 :                elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
     244              :             END SELECT
     245              :          END IF
     246              : 
     247              :          ! Prefer GPU kernel if available.
     248              : #if !defined(__NO_OFFLOAD_ELPA)
     249              : #if defined(__OFFLOAD_CUDA)
     250              :          elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
     251              : #endif
     252              : #if defined(__OFFLOAD_HIP)
     253              :          elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
     254              : #endif
     255              : #if defined(__OFFLOAD_OPENCL)
     256              :          elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
     257              : #endif
     258              : #endif
     259              :          ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
     260         9154 :          IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     261            0 :             elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
     262              :          END IF
     263              :       END IF
     264              : #else
     265              :       MARK_USED(requested_kernel)
     266              : #endif
     267         9158 :    END SUBROUTINE set_elpa_kernel
     268              : 
     269              : ! **************************************************************************************************
     270              : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
     271              : !> \param matrix the matrix that is diagonalized
     272              : !> \param eigenvectors eigenvectors of the input matrix
     273              : !> \param eigenvalues eigenvalues of the input matrix
     274              : ! **************************************************************************************************
     275        11144 :    SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     276              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, eigenvectors
     277              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     278              : 
     279              : #if defined(__ELPA)
     280              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
     281              : 
     282              :       INTEGER                                  :: handle
     283              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     284              :       TYPE(cp_fm_redistribute_info)            :: rdinfo
     285              : 
     286        11144 :       CALL timeset(routineN, handle)
     287              : 
     288              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     289              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     290              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     291              :       ! to the original matrix if no redistribution is required.
     292              :       ! With ELPA, we have to make sure that all processor columns have nonzero width
     293              :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
     294        11144 :                                     caller_is_elpa=.TRUE., redist_info=rdinfo)
     295              : 
     296              :       ! Call ELPA on CPUs that hold the new matrix
     297        11144 :       IF (ASSOCIATED(matrix_new%matrix_struct)) &
     298         8106 :          CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
     299              : 
     300              :       ! Redistribute results and clean up
     301        11144 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
     302              : 
     303        11144 :       CALL timestop(handle)
     304              : #else
     305              :       eigenvalues = 0
     306              :       MARK_USED(matrix)
     307              :       MARK_USED(eigenvectors)
     308              : 
     309              :       CPABORT("CP2K compiled without the ELPA library.")
     310              : #endif
     311        11144 :    END SUBROUTINE cp_fm_diag_elpa
     312              : 
     313              : #if defined(__ELPA)
     314              : ! **************************************************************************************************
     315              : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
     316              : !> \param matrix the matrix that is diagonalized
     317              : !> \param eigenvectors eigenvectors of the input matrix
     318              : !> \param eigenvalues eigenvalues of the input matrix
     319              : !> \param rdinfo ...
     320              : ! **************************************************************************************************
     321         8106 :    SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
     322              : 
     323              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     324              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     325              :       TYPE(cp_fm_redistribute_info), INTENT(IN)          :: rdinfo
     326              : 
     327              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
     328              : 
     329              :       INTEGER                                            :: handle
     330              : 
     331              :       CLASS(elpa_t), POINTER                   :: elpa_obj
     332              :       CHARACTER(len=default_string_length)     :: kernel_name
     333              :       TYPE(mp_comm_type) :: group
     334              :       INTEGER                                  :: i, &
     335              :                                                   mypcol, myprow, n, &
     336              :                                                   n_rows, n_cols, &
     337              :                                                   nblk, neig, io_unit, &
     338              :                                                   success
     339              :       LOGICAL                                  :: use_qr, check_eigenvalues
     340         8106 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
     341              :       TYPE(cp_blacs_env_type), POINTER         :: context
     342              :       TYPE(cp_fm_type)                         :: matrix_noqr, eigenvectors_noqr
     343              :       TYPE(cp_logger_type), POINTER            :: logger
     344              :       REAL(KIND=dp), PARAMETER                 :: th = 1.0E-14_dp
     345         8106 :       INTEGER, DIMENSION(:), POINTER           :: ncol_locals
     346              : #if defined(__HAS_IEEE_EXCEPTIONS)
     347              :       LOGICAL, DIMENSION(5)                    :: halt
     348              : #endif
     349              : 
     350         8106 :       CALL timeset(routineN, handle)
     351         8106 :       NULLIFY (logger)
     352         8106 :       NULLIFY (ncol_locals)
     353              : 
     354         8106 :       check_eigenvalues = .FALSE.
     355              : 
     356         8106 :       logger => cp_get_default_logger()
     357         8106 :       io_unit = cp_logger_get_default_io_unit(logger)
     358              : 
     359         8106 :       n = matrix%matrix_struct%nrow_global
     360         8106 :       context => matrix%matrix_struct%context
     361         8106 :       group = matrix%matrix_struct%para_env
     362              : 
     363         8106 :       myprow = context%mepos(1)
     364         8106 :       mypcol = context%mepos(2)
     365              : 
     366              :       ! elpa needs the full matrix
     367         8106 :       CALL cp_fm_uplo_to_full(matrix, eigenvectors)
     368              : 
     369              :       CALL cp_fm_struct_get(matrix%matrix_struct, &
     370              :                             local_leading_dimension=n_rows, &
     371              :                             ncol_local=n_cols, &
     372              :                             nrow_block=nblk, &
     373         8106 :                             ncol_locals=ncol_locals)
     374              : 
     375              :       ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
     376        16212 :       IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
     377            0 :          CALL rdinfo%write(io_unit)
     378            0 :          CALL cp_fm_write_info(matrix, io_unit)
     379            0 :          CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
     380              :       END IF
     381              : 
     382         8106 :       neig = SIZE(eigenvalues, 1)
     383              :       ! Decide if matrix is suitable for ELPA to use QR
     384              :       ! The definition of what is considered a suitable matrix depends on the ELPA version
     385              :       ! The relevant ELPA files to check are
     386              :       !     - Proper matrix order:  src/elpa2/elpa2_template.F90
     387              :       !     - Proper block size:    test/Fortran/test.F90
     388              :       ! Note that the names of these files might change in different ELPA versions
     389              :       ! Matrix order must be even
     390         8106 :       use_qr = elpa_qr .AND. (MODULO(n, 2) == 0)
     391              :       ! Matrix order and block size must be greater than or equal to 64
     392         8106 :       IF (.NOT. elpa_qr_unsafe) &
     393            0 :          use_qr = use_qr .AND. (n >= 64) .AND. (nblk >= 64)
     394              : 
     395              :       ! Check if eigenvalues computed with elpa_qr_unsafe should be verified
     396         8106 :       IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_print) &
     397            8 :          check_eigenvalues = .TRUE.
     398              : 
     399         8106 :       CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
     400              : 
     401         8106 :       IF (check_eigenvalues) THEN
     402              :          ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
     403           24 :          ALLOCATE (eval_noqr(n))
     404            8 :          CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
     405            8 :          CALL cp_fm_to_fm(matrix, matrix_noqr)
     406            8 :          CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
     407           16 :          CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
     408              :       END IF
     409              : 
     410         8106 :       IF (io_unit > 0 .AND. elpa_print) THEN
     411              :          WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
     412            6 :             "ELPA| Matrix diagonalization information"
     413              : 
     414              :          ! Find name for given kernel id.
     415              :          ! In case of ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
     416            6 :          kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
     417          132 :          DO i = 1, SIZE(elpa_kernel_ids)
     418          132 :             IF (elpa_kernel_ids(i) == elpa_kernel) THEN
     419            6 :                kernel_name = elpa_kernel_names(i)
     420              :             END IF
     421              :          END DO
     422              : 
     423              :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
     424            6 :             "ELPA| Matrix order (NA) ", n, &
     425            6 :             "ELPA| Matrix block size (NBLK) ", nblk, &
     426            6 :             "ELPA| Number of eigenvectors (NEV) ", neig, &
     427            6 :             "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
     428           12 :             "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
     429              :          WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
     430            6 :             "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
     431            6 :          IF (elpa_qr) THEN
     432              :             WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     433            4 :                "ELPA| QR step requested ", "YES"
     434              :          ELSE
     435              :             WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     436            2 :                "ELPA| QR step requested ", "NO"
     437              :          END IF
     438              : 
     439            6 :          IF (elpa_qr) THEN
     440            4 :             IF (use_qr) THEN
     441              :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     442            4 :                   "ELPA| Matrix is suitable for QR ", "YES"
     443              :             ELSE
     444              :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     445            0 :                   "ELPA| Matrix is suitable for QR ", "NO"
     446              :             END IF
     447            4 :             IF (.NOT. use_qr) THEN
     448            0 :                IF (MODULO(n, 2) /= 0) THEN
     449              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     450            0 :                      "ELPA| Matrix order is NOT even"
     451              :                END IF
     452            0 :                IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
     453              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     454            0 :                      "ELPA| Matrix block size is NOT 64 or greater"
     455              :                END IF
     456              :             ELSE
     457            4 :                IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
     458              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     459            2 :                      "ELPA| Matrix block size check was bypassed"
     460              :                END IF
     461              :             END IF
     462              :          END IF
     463              :       END IF
     464              : 
     465              :       ! the full eigenvalues vector is needed
     466        24318 :       ALLOCATE (eval(n))
     467              : 
     468         8106 :       elpa_obj => elpa_allocate()
     469              : 
     470         8106 :       CALL elpa_obj%set("na", n, success)
     471         8106 :       CPASSERT(success == ELPA_OK)
     472              : 
     473         8106 :       CALL elpa_obj%set("nev", neig, success)
     474         8106 :       CPASSERT(success == ELPA_OK)
     475              : 
     476         8106 :       CALL elpa_obj%set("local_nrows", n_rows, success)
     477         8106 :       CPASSERT(success == ELPA_OK)
     478              : 
     479         8106 :       CALL elpa_obj%set("local_ncols", n_cols, success)
     480         8106 :       CPASSERT(success == ELPA_OK)
     481              : 
     482         8106 :       CALL elpa_obj%set("nblk", nblk, success)
     483         8106 :       CPASSERT(success == ELPA_OK)
     484              : 
     485         8106 :       CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
     486         8106 :       CPASSERT(success == ELPA_OK)
     487              : 
     488         8106 :       CALL elpa_obj%set("process_row", myprow, success)
     489         8106 :       CPASSERT(success == ELPA_OK)
     490              : 
     491         8106 :       CALL elpa_obj%set("process_col", mypcol, success)
     492         8106 :       CPASSERT(success == ELPA_OK)
     493              : 
     494         8106 :       success = elpa_obj%setup()
     495         8106 :       CPASSERT(success == ELPA_OK)
     496              : 
     497              :       CALL elpa_obj%set("solver", &
     498              :                         MERGE(ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, elpa_one_stage), &
     499        16208 :                         success)
     500         8106 :       IF (success /= ELPA_OK) &
     501            0 :          CPABORT("Setting solver for ELPA failed")
     502              : 
     503              :       ! enabling the GPU must happen before setting the kernel
     504            0 :       SELECT CASE (elpa_kernel)
     505              :       CASE (ELPA_2STAGE_REAL_NVIDIA_GPU)
     506            0 :          CALL elpa_obj%set("nvidia-gpu", 1, success)
     507            0 :          CPASSERT(success == ELPA_OK)
     508              :       CASE (ELPA_2STAGE_REAL_AMD_GPU)
     509            0 :          CALL elpa_obj%set("amd-gpu", 1, success)
     510            0 :          CPASSERT(success == ELPA_OK)
     511              :       CASE (ELPA_2STAGE_REAL_INTEL_GPU_SYCL)
     512            0 :          CALL elpa_obj%set("intel-gpu", 1, success)
     513         8106 :          CPASSERT(success == ELPA_OK)
     514              :       END SELECT
     515              : 
     516         8106 :       IF (.NOT. elpa_one_stage) THEN
     517         8102 :          CALL elpa_obj%set("real_kernel", elpa_kernel, success)
     518         8102 :          CPWARN_IF(success /= ELPA_OK, "Setting real_kernel for ELPA failed")
     519              : 
     520         8102 :          IF (use_qr) THEN
     521            8 :             CALL elpa_obj%set("qr", 1, success)
     522            8 :             CPASSERT(success == ELPA_OK)
     523              :          END IF
     524              :       END IF
     525              : 
     526              :       ! Set number of threads only when ELPA was built with OpenMP support.
     527         8106 :       IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
     528         8106 :          CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
     529         8106 :          CPASSERT(success == ELPA_OK)
     530              :       END IF
     531              : 
     532              :       ! ELPA solver: calculate the Eigenvalues/vectors
     533              : #if defined(__HAS_IEEE_EXCEPTIONS)
     534              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     535              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     536              : #endif
     537         8106 :       CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
     538              : #if defined(__HAS_IEEE_EXCEPTIONS)
     539              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     540              : #endif
     541              : 
     542         8106 :       IF (success /= ELPA_OK) &
     543            0 :          CPABORT("ELPA failed to diagonalize a matrix")
     544              : 
     545         8106 :       IF (check_eigenvalues) THEN
     546              :          ! run again without QR
     547            8 :          CALL elpa_obj%set("qr", 0, success)
     548            8 :          CPASSERT(success == ELPA_OK)
     549              : 
     550            8 :          CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
     551            8 :          IF (success /= ELPA_OK) &
     552            0 :             CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
     553              : 
     554          680 :          IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) > th)) &
     555            0 :             CPABORT("ELPA failed to calculate Eigenvalues with ELPA's QR decomposition")
     556              : 
     557            8 :          DEALLOCATE (eval_noqr)
     558            8 :          CALL cp_fm_release(matrix_noqr)
     559            8 :          CALL cp_fm_release(eigenvectors_noqr)
     560              :       END IF
     561              : 
     562         8106 :       CALL elpa_deallocate(elpa_obj, success)
     563         8106 :       CPASSERT(success == ELPA_OK)
     564              : 
     565       909217 :       eigenvalues(1:neig) = eval(1:neig)
     566         8106 :       DEALLOCATE (eval)
     567              : 
     568         8106 :       CALL timestop(handle)
     569              : 
     570        32424 :    END SUBROUTINE cp_fm_diag_elpa_base
     571              : #endif
     572              : 
     573              : END MODULE cp_fm_elpa
        

Generated by: LCOV version 2.0-1