Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Wrapper for ELPA
10 : !> \author Ole Schuett
11 : ! **************************************************************************************************
12 : MODULE cp_fm_elpa
13 : USE cp_log_handling, ONLY: cp_to_string
14 : USE machine, ONLY: m_cpuid, &
15 : MACHINE_X86, &
16 : MACHINE_CPU_GENERIC, &
17 : MACHINE_X86_SSE4, &
18 : MACHINE_X86_AVX, &
19 : MACHINE_X86_AVX2
20 : USE cp_blacs_env, ONLY: cp_blacs_env_type
21 : USE cp_fm_basic_linalg, ONLY: cp_fm_uplo_to_full
22 : USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
23 : cp_fm_redistribute_end, &
24 : cp_fm_redistribute_info
25 : USE cp_fm_struct, ONLY: cp_fm_struct_get
26 : USE cp_fm_types, ONLY: cp_fm_type, &
27 : cp_fm_to_fm, &
28 : cp_fm_release, &
29 : cp_fm_create, &
30 : cp_fm_write_info
31 : USE cp_log_handling, ONLY: cp_get_default_logger, &
32 : cp_logger_get_default_io_unit, &
33 : cp_logger_type
34 : USE kinds, ONLY: default_string_length, dp
35 : USE message_passing, ONLY: mp_comm_type
36 : USE OMP_LIB, ONLY: omp_get_max_threads
37 : #if defined(__HAS_IEEE_EXCEPTIONS)
38 : USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
39 : ieee_set_halting_mode, &
40 : IEEE_ALL
41 : #endif
42 : #include "../base/base_uses.f90"
43 :
44 : #if defined(__ELPA)
45 : USE elpa_constants, ONLY: ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, ELPA_OK, &
46 : ELPA_2STAGE_REAL_INVALID, &
47 : ELPA_2STAGE_REAL_DEFAULT, &
48 : ELPA_2STAGE_REAL_GENERIC, &
49 : ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
50 : ELPA_2STAGE_REAL_BGP, &
51 : ELPA_2STAGE_REAL_BGQ, &
52 : ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
53 : ELPA_2STAGE_REAL_SSE_BLOCK2, &
54 : ELPA_2STAGE_REAL_SSE_BLOCK4, &
55 : ELPA_2STAGE_REAL_SSE_BLOCK6, &
56 : ELPA_2STAGE_REAL_AVX_BLOCK2, &
57 : ELPA_2STAGE_REAL_AVX_BLOCK4, &
58 : ELPA_2STAGE_REAL_AVX_BLOCK6, &
59 : ELPA_2STAGE_REAL_AVX2_BLOCK2, &
60 : ELPA_2STAGE_REAL_AVX2_BLOCK4, &
61 : ELPA_2STAGE_REAL_AVX2_BLOCK6, &
62 : ELPA_2STAGE_REAL_AVX512_BLOCK2, &
63 : ELPA_2STAGE_REAL_AVX512_BLOCK4, &
64 : ELPA_2STAGE_REAL_AVX512_BLOCK6, &
65 : ELPA_2STAGE_REAL_NVIDIA_GPU, &
66 : ELPA_2STAGE_REAL_AMD_GPU, &
67 : ELPA_2STAGE_REAL_INTEL_GPU_SYCL
68 :
69 : USE elpa, ONLY: elpa_t, elpa_init, elpa_uninit, &
70 : elpa_allocate, elpa_deallocate
71 : #endif
72 :
73 : IMPLICIT NONE
74 :
75 : PRIVATE
76 :
77 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
78 :
79 : #if defined(__ELPA)
80 : INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
81 : ELPA_2STAGE_REAL_INVALID, & ! auto
82 : ELPA_2STAGE_REAL_GENERIC, &
83 : ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
84 : ELPA_2STAGE_REAL_BGP, &
85 : ELPA_2STAGE_REAL_BGQ, &
86 : ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
87 : ELPA_2STAGE_REAL_SSE_BLOCK2, &
88 : ELPA_2STAGE_REAL_SSE_BLOCK4, &
89 : ELPA_2STAGE_REAL_SSE_BLOCK6, &
90 : ELPA_2STAGE_REAL_AVX_BLOCK2, &
91 : ELPA_2STAGE_REAL_AVX_BLOCK4, &
92 : ELPA_2STAGE_REAL_AVX_BLOCK6, &
93 : ELPA_2STAGE_REAL_AVX2_BLOCK2, &
94 : ELPA_2STAGE_REAL_AVX2_BLOCK4, &
95 : ELPA_2STAGE_REAL_AVX2_BLOCK6, &
96 : ELPA_2STAGE_REAL_AVX512_BLOCK2, &
97 : ELPA_2STAGE_REAL_AVX512_BLOCK4, &
98 : ELPA_2STAGE_REAL_AVX512_BLOCK6, &
99 : ELPA_2STAGE_REAL_NVIDIA_GPU, &
100 : ELPA_2STAGE_REAL_AMD_GPU, &
101 : ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
102 :
103 : CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
104 : elpa_kernel_names = [CHARACTER(len=14) :: &
105 : "AUTO", &
106 : "GENERIC", &
107 : "GENERIC_SIMPLE", &
108 : "BGP", &
109 : "BGQ", &
110 : "SSE", &
111 : "SSE_BLOCK2", &
112 : "SSE_BLOCK4", &
113 : "SSE_BLOCK6", &
114 : "AVX_BLOCK2", &
115 : "AVX_BLOCK4", &
116 : "AVX_BLOCK6", &
117 : "AVX2_BLOCK2", &
118 : "AVX2_BLOCK4", &
119 : "AVX2_BLOCK6", &
120 : "AVX512_BLOCK2", &
121 : "AVX512_BLOCK4", &
122 : "AVX512_BLOCK6", &
123 : "NVIDIA_GPU", &
124 : "AMD_GPU", &
125 : "INTEL_GPU"]
126 :
127 : CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
128 : elpa_kernel_descriptions = [CHARACTER(len=44) :: &
129 : "Automatically selected kernel", &
130 : "Generic kernel", &
131 : "Simplified generic kernel", &
132 : "Kernel optimized for IBM BGP", &
133 : "Kernel optimized for IBM BGQ", &
134 : "Kernel optimized for x86_64/SSE", &
135 : "Kernel optimized for x86_64/SSE (block=2)", &
136 : "Kernel optimized for x86_64/SSE (block=4)", &
137 : "Kernel optimized for x86_64/SSE (block=6)", &
138 : "Kernel optimized for Intel AVX (block=2)", &
139 : "Kernel optimized for Intel AVX (block=4)", &
140 : "Kernel optimized for Intel AVX (block=6)", &
141 : "Kernel optimized for Intel AVX2 (block=2)", &
142 : "Kernel optimized for Intel AVX2 (block=4)", &
143 : "Kernel optimized for Intel AVX2 (block=6)", &
144 : "Kernel optimized for Intel AVX-512 (block=2)", &
145 : "Kernel optimized for Intel AVX-512 (block=4)", &
146 : "Kernel optimized for Intel AVX-512 (block=6)", &
147 : "Kernel targeting Nvidia GPUs", &
148 : "Kernel targeting AMD GPUs", &
149 : "Kernel targeting Intel GPUs"]
150 : #else
151 : INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
152 : CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
153 : CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
154 : #endif
155 :
156 : #if defined(__ELPA)
157 : INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
158 : #endif
159 :
160 : ! elpa_qr_unsafe: disable block size limitations
161 : LOGICAL, SAVE :: elpa_qr_unsafe = .TRUE., &
162 : elpa_print = .FALSE., &
163 : elpa_qr = .FALSE.
164 :
165 : #if defined(__OFFLOAD_OPENCL)
166 : LOGICAL, SAVE :: elpa_one_stage = .TRUE.
167 : #else
168 : LOGICAL, SAVE :: elpa_one_stage = .FALSE.
169 : #endif
170 :
171 : PUBLIC :: cp_fm_diag_elpa, &
172 : set_elpa_kernel, &
173 : elpa_one_stage, &
174 : elpa_print, &
175 : elpa_qr, &
176 : elpa_kernel_ids, &
177 : elpa_kernel_names, &
178 : elpa_kernel_descriptions, &
179 : initialize_elpa_library, &
180 : finalize_elpa_library
181 :
182 : CONTAINS
183 :
184 : ! **************************************************************************************************
185 : !> \brief Initialize the ELPA library
186 : !> \param one_stage ...
187 : !> \param qr ...
188 : !> \param should_print flag that determines if additional information
189 : !> is printed when the diagonalization routine is called.
190 : ! **************************************************************************************************
191 9158 : SUBROUTINE initialize_elpa_library(one_stage, qr, should_print)
192 : LOGICAL, INTENT(IN), OPTIONAL :: one_stage, qr, should_print
193 :
194 : #if defined(__ELPA)
195 9158 : IF (elpa_init(20180525) /= ELPA_OK) &
196 0 : CPABORT("The linked ELPA library does not support the required API version")
197 9158 : IF (PRESENT(one_stage)) elpa_one_stage = one_stage
198 9158 : IF (PRESENT(should_print)) elpa_print = should_print
199 9158 : IF (PRESENT(qr)) elpa_qr = qr
200 : #else
201 : MARK_USED(one_stage)
202 : MARK_USED(qr)
203 : MARK_USED(should_print)
204 : CPABORT("Initialization of ELPA library requested but not enabled during build")
205 : #endif
206 9158 : END SUBROUTINE initialize_elpa_library
207 :
208 : ! **************************************************************************************************
209 : !> \brief Finalize the ELPA library
210 : ! **************************************************************************************************
211 9545 : SUBROUTINE finalize_elpa_library()
212 : #if defined(__ELPA)
213 9545 : CALL elpa_uninit()
214 : #else
215 : CPABORT("Finalization of ELPA library requested but not enabled during build")
216 : #endif
217 9545 : END SUBROUTINE finalize_elpa_library
218 :
219 : ! **************************************************************************************************
220 : !> \brief Sets the active ELPA kernel.
221 : !> \param requested_kernel one of the elpa_kernel_ids
222 : ! **************************************************************************************************
223 9158 : SUBROUTINE set_elpa_kernel(requested_kernel)
224 : INTEGER, INTENT(IN) :: requested_kernel
225 :
226 : #if defined(__ELPA)
227 : INTEGER :: cpuid
228 :
229 9158 : elpa_kernel = requested_kernel
230 :
231 : ! Resolve AUTO kernel.
232 9158 : IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
233 9154 : cpuid = m_cpuid()
234 9154 : IF ((MACHINE_CPU_GENERIC < cpuid) .AND. (cpuid <= MACHINE_X86)) THEN
235 0 : SELECT CASE (cpuid)
236 : CASE (MACHINE_X86_SSE4)
237 0 : elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
238 : CASE (MACHINE_X86_AVX)
239 0 : elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
240 : CASE (MACHINE_X86_AVX2)
241 9154 : elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
242 : CASE DEFAULT
243 9154 : elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
244 : END SELECT
245 : END IF
246 :
247 : ! Prefer GPU kernel if available.
248 : #if !defined(__NO_OFFLOAD_ELPA)
249 : #if defined(__OFFLOAD_CUDA)
250 : elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
251 : #endif
252 : #if defined(__OFFLOAD_HIP)
253 : elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
254 : #endif
255 : #if defined(__OFFLOAD_OPENCL)
256 : elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
257 : #endif
258 : #endif
259 : ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
260 9154 : IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
261 0 : elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
262 : END IF
263 : END IF
264 : #else
265 : MARK_USED(requested_kernel)
266 : #endif
267 9158 : END SUBROUTINE set_elpa_kernel
268 :
269 : ! **************************************************************************************************
270 : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
271 : !> \param matrix the matrix that is diagonalized
272 : !> \param eigenvectors eigenvectors of the input matrix
273 : !> \param eigenvalues eigenvalues of the input matrix
274 : ! **************************************************************************************************
275 11144 : SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
276 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
277 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
278 :
279 : #if defined(__ELPA)
280 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
281 :
282 : INTEGER :: handle
283 : TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
284 : TYPE(cp_fm_redistribute_info) :: rdinfo
285 :
286 11144 : CALL timeset(routineN, handle)
287 :
288 : ! Determine if the input matrix needs to be redistributed before diagonalization.
289 : ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
290 : ! The redistributed matrix is stored in matrix_new, which is just a pointer
291 : ! to the original matrix if no redistribution is required.
292 : ! With ELPA, we have to make sure that all processor columns have nonzero width
293 : CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
294 11144 : caller_is_elpa=.TRUE., redist_info=rdinfo)
295 :
296 : ! Call ELPA on CPUs that hold the new matrix
297 11144 : IF (ASSOCIATED(matrix_new%matrix_struct)) &
298 8106 : CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
299 :
300 : ! Redistribute results and clean up
301 11144 : CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
302 :
303 11144 : CALL timestop(handle)
304 : #else
305 : eigenvalues = 0
306 : MARK_USED(matrix)
307 : MARK_USED(eigenvectors)
308 :
309 : CPABORT("CP2K compiled without the ELPA library.")
310 : #endif
311 11144 : END SUBROUTINE cp_fm_diag_elpa
312 :
313 : #if defined(__ELPA)
314 : ! **************************************************************************************************
315 : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
316 : !> \param matrix the matrix that is diagonalized
317 : !> \param eigenvectors eigenvectors of the input matrix
318 : !> \param eigenvalues eigenvalues of the input matrix
319 : !> \param rdinfo ...
320 : ! **************************************************************************************************
321 8106 : SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
322 :
323 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
324 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
325 : TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
326 :
327 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
328 :
329 : INTEGER :: handle
330 :
331 : CLASS(elpa_t), POINTER :: elpa_obj
332 : CHARACTER(len=default_string_length) :: kernel_name
333 : TYPE(mp_comm_type) :: group
334 : INTEGER :: i, &
335 : mypcol, myprow, n, &
336 : n_rows, n_cols, &
337 : nblk, neig, io_unit, &
338 : success
339 : LOGICAL :: use_qr, check_eigenvalues
340 8106 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
341 : TYPE(cp_blacs_env_type), POINTER :: context
342 : TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
343 : TYPE(cp_logger_type), POINTER :: logger
344 : REAL(KIND=dp), PARAMETER :: th = 1.0E-14_dp
345 8106 : INTEGER, DIMENSION(:), POINTER :: ncol_locals
346 : #if defined(__HAS_IEEE_EXCEPTIONS)
347 : LOGICAL, DIMENSION(5) :: halt
348 : #endif
349 :
350 8106 : CALL timeset(routineN, handle)
351 8106 : NULLIFY (logger)
352 8106 : NULLIFY (ncol_locals)
353 :
354 8106 : check_eigenvalues = .FALSE.
355 :
356 8106 : logger => cp_get_default_logger()
357 8106 : io_unit = cp_logger_get_default_io_unit(logger)
358 :
359 8106 : n = matrix%matrix_struct%nrow_global
360 8106 : context => matrix%matrix_struct%context
361 8106 : group = matrix%matrix_struct%para_env
362 :
363 8106 : myprow = context%mepos(1)
364 8106 : mypcol = context%mepos(2)
365 :
366 : ! elpa needs the full matrix
367 8106 : CALL cp_fm_uplo_to_full(matrix, eigenvectors)
368 :
369 : CALL cp_fm_struct_get(matrix%matrix_struct, &
370 : local_leading_dimension=n_rows, &
371 : ncol_local=n_cols, &
372 : nrow_block=nblk, &
373 8106 : ncol_locals=ncol_locals)
374 :
375 : ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
376 16212 : IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
377 0 : CALL rdinfo%write(io_unit)
378 0 : CALL cp_fm_write_info(matrix, io_unit)
379 0 : CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
380 : END IF
381 :
382 8106 : neig = SIZE(eigenvalues, 1)
383 : ! Decide if matrix is suitable for ELPA to use QR
384 : ! The definition of what is considered a suitable matrix depends on the ELPA version
385 : ! The relevant ELPA files to check are
386 : ! - Proper matrix order: src/elpa2/elpa2_template.F90
387 : ! - Proper block size: test/Fortran/test.F90
388 : ! Note that the names of these files might change in different ELPA versions
389 : ! Matrix order must be even
390 8106 : use_qr = elpa_qr .AND. (MODULO(n, 2) == 0)
391 : ! Matrix order and block size must be greater than or equal to 64
392 8106 : IF (.NOT. elpa_qr_unsafe) &
393 0 : use_qr = use_qr .AND. (n >= 64) .AND. (nblk >= 64)
394 :
395 : ! Check if eigenvalues computed with elpa_qr_unsafe should be verified
396 8106 : IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_print) &
397 8 : check_eigenvalues = .TRUE.
398 :
399 8106 : CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
400 :
401 8106 : IF (check_eigenvalues) THEN
402 : ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
403 24 : ALLOCATE (eval_noqr(n))
404 8 : CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
405 8 : CALL cp_fm_to_fm(matrix, matrix_noqr)
406 8 : CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
407 16 : CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
408 : END IF
409 :
410 8106 : IF (io_unit > 0 .AND. elpa_print) THEN
411 : WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
412 6 : "ELPA| Matrix diagonalization information"
413 :
414 : ! Find name for given kernel id.
415 : ! In case of ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
416 6 : kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
417 132 : DO i = 1, SIZE(elpa_kernel_ids)
418 132 : IF (elpa_kernel_ids(i) == elpa_kernel) THEN
419 6 : kernel_name = elpa_kernel_names(i)
420 : END IF
421 : END DO
422 :
423 : WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
424 6 : "ELPA| Matrix order (NA) ", n, &
425 6 : "ELPA| Matrix block size (NBLK) ", nblk, &
426 6 : "ELPA| Number of eigenvectors (NEV) ", neig, &
427 6 : "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
428 12 : "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
429 : WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
430 6 : "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
431 6 : IF (elpa_qr) THEN
432 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
433 4 : "ELPA| QR step requested ", "YES"
434 : ELSE
435 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
436 2 : "ELPA| QR step requested ", "NO"
437 : END IF
438 :
439 6 : IF (elpa_qr) THEN
440 4 : IF (use_qr) THEN
441 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
442 4 : "ELPA| Matrix is suitable for QR ", "YES"
443 : ELSE
444 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
445 0 : "ELPA| Matrix is suitable for QR ", "NO"
446 : END IF
447 4 : IF (.NOT. use_qr) THEN
448 0 : IF (MODULO(n, 2) /= 0) THEN
449 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
450 0 : "ELPA| Matrix order is NOT even"
451 : END IF
452 0 : IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
453 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
454 0 : "ELPA| Matrix block size is NOT 64 or greater"
455 : END IF
456 : ELSE
457 4 : IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
458 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
459 2 : "ELPA| Matrix block size check was bypassed"
460 : END IF
461 : END IF
462 : END IF
463 : END IF
464 :
465 : ! the full eigenvalues vector is needed
466 24318 : ALLOCATE (eval(n))
467 :
468 8106 : elpa_obj => elpa_allocate()
469 :
470 8106 : CALL elpa_obj%set("na", n, success)
471 8106 : CPASSERT(success == ELPA_OK)
472 :
473 8106 : CALL elpa_obj%set("nev", neig, success)
474 8106 : CPASSERT(success == ELPA_OK)
475 :
476 8106 : CALL elpa_obj%set("local_nrows", n_rows, success)
477 8106 : CPASSERT(success == ELPA_OK)
478 :
479 8106 : CALL elpa_obj%set("local_ncols", n_cols, success)
480 8106 : CPASSERT(success == ELPA_OK)
481 :
482 8106 : CALL elpa_obj%set("nblk", nblk, success)
483 8106 : CPASSERT(success == ELPA_OK)
484 :
485 8106 : CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
486 8106 : CPASSERT(success == ELPA_OK)
487 :
488 8106 : CALL elpa_obj%set("process_row", myprow, success)
489 8106 : CPASSERT(success == ELPA_OK)
490 :
491 8106 : CALL elpa_obj%set("process_col", mypcol, success)
492 8106 : CPASSERT(success == ELPA_OK)
493 :
494 8106 : success = elpa_obj%setup()
495 8106 : CPASSERT(success == ELPA_OK)
496 :
497 : CALL elpa_obj%set("solver", &
498 : MERGE(ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, elpa_one_stage), &
499 16208 : success)
500 8106 : IF (success /= ELPA_OK) &
501 0 : CPABORT("Setting solver for ELPA failed")
502 :
503 : ! enabling the GPU must happen before setting the kernel
504 0 : SELECT CASE (elpa_kernel)
505 : CASE (ELPA_2STAGE_REAL_NVIDIA_GPU)
506 0 : CALL elpa_obj%set("nvidia-gpu", 1, success)
507 0 : CPASSERT(success == ELPA_OK)
508 : CASE (ELPA_2STAGE_REAL_AMD_GPU)
509 0 : CALL elpa_obj%set("amd-gpu", 1, success)
510 0 : CPASSERT(success == ELPA_OK)
511 : CASE (ELPA_2STAGE_REAL_INTEL_GPU_SYCL)
512 0 : CALL elpa_obj%set("intel-gpu", 1, success)
513 8106 : CPASSERT(success == ELPA_OK)
514 : END SELECT
515 :
516 8106 : IF (.NOT. elpa_one_stage) THEN
517 8102 : CALL elpa_obj%set("real_kernel", elpa_kernel, success)
518 8102 : CPWARN_IF(success /= ELPA_OK, "Setting real_kernel for ELPA failed")
519 :
520 8102 : IF (use_qr) THEN
521 8 : CALL elpa_obj%set("qr", 1, success)
522 8 : CPASSERT(success == ELPA_OK)
523 : END IF
524 : END IF
525 :
526 : ! Set number of threads only when ELPA was built with OpenMP support.
527 8106 : IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
528 8106 : CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
529 8106 : CPASSERT(success == ELPA_OK)
530 : END IF
531 :
532 : ! ELPA solver: calculate the Eigenvalues/vectors
533 : #if defined(__HAS_IEEE_EXCEPTIONS)
534 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
535 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
536 : #endif
537 8106 : CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
538 : #if defined(__HAS_IEEE_EXCEPTIONS)
539 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
540 : #endif
541 :
542 8106 : IF (success /= ELPA_OK) &
543 0 : CPABORT("ELPA failed to diagonalize a matrix")
544 :
545 8106 : IF (check_eigenvalues) THEN
546 : ! run again without QR
547 8 : CALL elpa_obj%set("qr", 0, success)
548 8 : CPASSERT(success == ELPA_OK)
549 :
550 8 : CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
551 8 : IF (success /= ELPA_OK) &
552 0 : CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
553 :
554 680 : IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) > th)) &
555 0 : CPABORT("ELPA failed to calculate Eigenvalues with ELPA's QR decomposition")
556 :
557 8 : DEALLOCATE (eval_noqr)
558 8 : CALL cp_fm_release(matrix_noqr)
559 8 : CALL cp_fm_release(eigenvectors_noqr)
560 : END IF
561 :
562 8106 : CALL elpa_deallocate(elpa_obj, success)
563 8106 : CPASSERT(success == ELPA_OK)
564 :
565 909217 : eigenvalues(1:neig) = eval(1:neig)
566 8106 : DEALLOCATE (eval)
567 :
568 8106 : CALL timestop(handle)
569 :
570 32424 : END SUBROUTINE cp_fm_diag_elpa_base
571 : #endif
572 :
573 : END MODULE cp_fm_elpa
|