Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief used for collecting some of the diagonalization schemes available for
10 : !> cp_fm_type. cp_fm_power also moved here as it is very related
11 : !> \note
12 : !> first version : most routines imported
13 : !> \par History
14 : !> - unused Jacobi routines removed, cosmetics (05.04.06,MK)
15 : !> \author Joost VandeVondele (2003-08)
16 : ! **************************************************************************************************
17 : MODULE cp_fm_diag
18 : USE cp_blacs_types, ONLY: cp_blacs_type
19 : USE cp_blacs_env, ONLY: cp_blacs_env_type
20 : USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
21 : cp_fm_gemm, &
22 : cp_fm_scale, &
23 : cp_fm_syrk, &
24 : cp_fm_triangular_invert, &
25 : cp_fm_triangular_multiply, &
26 : cp_fm_uplo_to_full
27 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
28 : USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
29 : cp_fm_redistribute_start
30 : USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
31 : finalize_elpa_library, &
32 : initialize_elpa_library, &
33 : set_elpa_kernel
34 : USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
35 : cp_fm_general_cusolver
36 : #if defined(__DLAF)
37 : USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
38 : USE cp_dlaf_utils_api, ONLY: cp_dlaf_initialize, cp_dlaf_finalize
39 : #endif
40 : USE cp_fm_types, ONLY: cp_fm_get_info, &
41 : cp_fm_set_element, &
42 : cp_fm_to_fm, &
43 : cp_fm_type, &
44 : cp_fm_create, &
45 : cp_fm_get_info, &
46 : cp_fm_release, &
47 : cp_fm_set_all, &
48 : cp_fm_to_fm, &
49 : cp_fm_to_fm_submat, &
50 : cp_fm_type
51 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
52 : cp_fm_struct_create, &
53 : cp_fm_struct_release, &
54 : cp_fm_struct_type
55 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
56 : cp_get_default_logger, &
57 : cp_logger_get_default_io_unit, &
58 : cp_logger_type, &
59 : cp_to_string
60 : USE cp_log_handling, ONLY: cp_get_default_logger, &
61 : cp_logger_get_default_unit_nr, &
62 : cp_logger_get_unit_nr, &
63 : cp_logger_type
64 : USE kinds, ONLY: default_string_length, &
65 : dp
66 : USE machine, ONLY: default_output_unit, &
67 : m_memory
68 : USE parallel_gemm_api, ONLY: parallel_gemm
69 : #if defined (__parallel)
70 : USE message_passing, ONLY: mp_comm_type
71 : #endif
72 : #if defined (__HAS_IEEE_EXCEPTIONS)
73 : USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
74 : ieee_set_halting_mode, &
75 : IEEE_ALL
76 : #endif
77 : #include "../base/base_uses.f90"
78 :
79 : IMPLICIT NONE
80 : PRIVATE
81 :
82 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
83 :
84 : REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
85 :
86 : ! The following saved variables are diagonalization global
87 : ! Stores the default library for diagonalization
88 : INTEGER, SAVE, PUBLIC :: diag_type = 0
89 : ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
90 : ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
91 : INTEGER, SAVE :: elpa_neigvec_min = 0
92 : ! Minimum matrix size for the use of the cuSOLVERMp eigensolver.
93 : ! Smaller matrices use the ScaLAPACK fallback to avoid GPU launch overheads.
94 : INTEGER, PARAMETER, PUBLIC :: cusolver_n_min = 64
95 : #if defined(__DLAF)
96 : ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
97 : ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
98 : INTEGER, SAVE, PUBLIC :: dlaf_neigvec_min = 0
99 : #endif
100 : LOGICAL, SAVE, PUBLIC :: direct_generalized_diagonalization = .FALSE.
101 : ! Threshold value for the orthonormality check of the eigenvectors obtained
102 : ! after a diagonalization. A negative value disables the check.
103 : REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
104 :
105 : ! Constants for the diag_type above
106 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_SCALAPACK = 101, &
107 : FM_DIAG_TYPE_ELPA = 102, &
108 : FM_DIAG_TYPE_CUSOLVER = 103, &
109 : FM_DIAG_TYPE_DLAF = 104
110 : #if defined(__CUSOLVERMP)
111 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
112 : #elif defined(__ELPA)
113 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
114 : #else
115 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
116 : #endif
117 :
118 : ! Public subroutines
119 : PUBLIC :: choose_eigv_solver, &
120 : cp_fm_block_jacobi, &
121 : cp_fm_power, &
122 : cp_fm_syevd, &
123 : cp_fm_syevx, &
124 : cp_fm_svd, &
125 : cp_fm_geeig, &
126 : cp_fm_geeig_canon, &
127 : diag_check_requested, &
128 : diag_check_warning_threshold, &
129 : diag_init, &
130 : diag_finalize
131 :
132 : CONTAINS
133 :
134 : ! **************************************************************************************************
135 : !> \brief Setup the diagonalization library to be used
136 : !> \param diag_lib diag_library flag from GLOBAL section in input
137 : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
138 : !> to ScaLAPACK was applied, .FALSE. otherwise.
139 : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
140 : !> \param elpa_neigvec_min_input ...
141 : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
142 : !> diagonalization procedure of suitably sized matrices
143 : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
144 : !> be printed
145 : !> \param elpa_one_stage logical that enables the one-stage solver
146 : !> \param dlaf_neigvec_min_input ...
147 : !> \param eps_check_diag_input ...
148 : !> \param direct_generalized_diagonalization_input ...
149 : !> \par History
150 : !> - Add support for DLA-Future (05.09.2023, RMeli)
151 : !> \author MI 11.2013
152 : ! **************************************************************************************************
153 10420 : SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
154 : elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input, &
155 : direct_generalized_diagonalization_input)
156 : CHARACTER(LEN=*), INTENT(IN) :: diag_lib
157 : LOGICAL, INTENT(OUT) :: fallback_applied
158 : INTEGER, INTENT(IN) :: elpa_kernel, elpa_neigvec_min_input
159 : LOGICAL, INTENT(IN) :: elpa_qr, elpa_print, elpa_one_stage
160 : INTEGER, INTENT(IN) :: dlaf_neigvec_min_input
161 : REAL(KIND=dp), INTENT(IN) :: eps_check_diag_input
162 : LOGICAL, INTENT(IN), OPTIONAL :: direct_generalized_diagonalization_input
163 :
164 : LOGICAL, SAVE :: initialized = .FALSE.
165 :
166 10420 : fallback_applied = .FALSE.
167 :
168 10420 : IF (diag_lib == "ScaLAPACK") THEN
169 198 : diag_type = FM_DIAG_TYPE_SCALAPACK
170 10222 : ELSE IF (diag_lib == "ELPA") THEN
171 : #if defined (__ELPA)
172 : ! ELPA is requested and available
173 10222 : diag_type = FM_DIAG_TYPE_ELPA
174 : #else
175 : ! ELPA library requested but not linked, switch back to SL
176 : diag_type = FM_DIAG_TYPE_SCALAPACK
177 : fallback_applied = .TRUE.
178 : #endif
179 0 : ELSE IF (diag_lib == "cuSOLVER") THEN
180 0 : diag_type = FM_DIAG_TYPE_CUSOLVER
181 0 : ELSE IF (diag_lib == "DLAF") THEN
182 : #if defined (__DLAF)
183 : diag_type = FM_DIAG_TYPE_DLAF
184 : #else
185 0 : CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
186 : #endif
187 : ELSE
188 0 : CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
189 : END IF
190 :
191 : ! Initialization of requested diagonalization library
192 10420 : IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
193 9625 : CALL initialize_elpa_library(one_stage=elpa_one_stage, qr=elpa_qr, should_print=elpa_print)
194 9625 : CALL set_elpa_kernel(elpa_kernel)
195 9625 : initialized = .TRUE.
196 : END IF
197 : #if defined(__DLAF)
198 : IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_DLAF) THEN
199 : CALL cp_dlaf_initialize()
200 : initialized = .TRUE.
201 : END IF
202 : dlaf_neigvec_min = dlaf_neigvec_min_input
203 : #else
204 : MARK_USED(dlaf_neigvec_min_input)
205 : #endif
206 :
207 10420 : elpa_neigvec_min = elpa_neigvec_min_input
208 10420 : eps_check_diag = eps_check_diag_input
209 10420 : IF (PRESENT(direct_generalized_diagonalization_input)) THEN
210 10420 : direct_generalized_diagonalization = direct_generalized_diagonalization_input
211 : ELSE
212 0 : direct_generalized_diagonalization = .FALSE.
213 : END IF
214 :
215 10420 : END SUBROUTINE diag_init
216 :
217 : ! **************************************************************************************************
218 : !> \brief Finalize the diagonalization library
219 : ! **************************************************************************************************
220 10209 : SUBROUTINE diag_finalize()
221 : #if defined (__ELPA)
222 10209 : IF (diag_type == FM_DIAG_TYPE_ELPA) &
223 10011 : CALL finalize_elpa_library()
224 : #endif
225 : #if defined (__DLAF)
226 : IF (diag_type == FM_DIAG_TYPE_DLAF) &
227 : CALL cp_dlaf_finalize()
228 : #endif
229 10209 : END SUBROUTINE diag_finalize
230 :
231 : ! **************************************************************************************************
232 : !> \brief Choose the Eigensolver depending on which library is available
233 : !> ELPA seems to be unstable for small systems
234 : !> \param matrix ...
235 : !> \param eigenvectors ...
236 : !> \param eigenvalues ...
237 : !> \param info ...
238 : !> \par info If present returns error code and prevents program stops.
239 : !> Works currently only for cp_fm_syevd with ScaLAPACK.
240 : !> Other solvers will end the program regardless of PRESENT(info).
241 : !> \par History
242 : !> - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
243 : ! **************************************************************************************************
244 201348 : SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
245 :
246 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
247 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
248 : INTEGER, INTENT(OUT), OPTIONAL :: info
249 :
250 : CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
251 :
252 : ! Sample peak memory
253 201348 : CALL m_memory()
254 :
255 201348 : IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
256 :
257 201348 : IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
258 4560 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
259 :
260 196788 : ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
261 196788 : IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
262 : ! We don't trust ELPA with very small matrices.
263 180316 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
264 : ELSE
265 16472 : CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
266 : END IF
267 :
268 0 : ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
269 0 : IF (matrix%matrix_struct%nrow_global < cusolver_n_min) THEN
270 : ! We don't trust cuSolver with very small matrices.
271 0 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
272 : ELSE
273 0 : CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
274 : END IF
275 :
276 : #if defined(__DLAF)
277 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
278 : IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
279 : ! Use ScaLAPACK for small matrices
280 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
281 : ELSE
282 : CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
283 : END IF
284 : #endif
285 :
286 : ELSE
287 0 : CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
288 : END IF
289 :
290 201348 : CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
291 :
292 201348 : END SUBROUTINE choose_eigv_solver
293 :
294 : ! **************************************************************************************************
295 : !> \brief Return whether diagonalization checks should be performed.
296 : !> \return ...
297 : ! **************************************************************************************************
298 424308 : FUNCTION diag_check_requested() RESULT(check_requested)
299 : LOGICAL :: check_requested
300 :
301 : #if defined(__CHECK_DIAG)
302 : check_requested = .TRUE.
303 : #else
304 424308 : check_requested = eps_check_diag >= 0.0_dp
305 : #endif
306 :
307 424308 : END FUNCTION diag_check_requested
308 :
309 : ! **************************************************************************************************
310 : !> \brief Return the warning threshold for diagonalization checks.
311 : !> \return ...
312 : ! **************************************************************************************************
313 389608 : FUNCTION diag_check_warning_threshold() RESULT(eps_warning)
314 : REAL(KIND=dp) :: eps_warning
315 :
316 389608 : eps_warning = eps_check_diag_default
317 389608 : IF (eps_check_diag >= 0.0_dp) THEN
318 354 : eps_warning = eps_check_diag
319 : END IF
320 :
321 389608 : END FUNCTION diag_check_warning_threshold
322 :
323 : ! **************************************************************************************************
324 : !> \brief Check result of diagonalization, i.e. the orthonormality of the eigenvectors
325 : !> \param matrix Work matrix
326 : !> \param eigenvectors Eigenvectors to be checked
327 : !> \param nvec ...
328 : ! **************************************************************************************************
329 389590 : SUBROUTINE check_diag(matrix, eigenvectors, nvec)
330 :
331 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
332 : INTEGER, INTENT(IN) :: nvec
333 :
334 : CHARACTER(LEN=*), PARAMETER :: routineN = 'check_diag'
335 :
336 : CHARACTER(LEN=default_string_length) :: diag_type_name
337 : REAL(KIND=dp) :: eps, eps_abort, eps_warning, gold, test
338 : INTEGER :: handle, i, j, ncol, nrow, output_unit
339 : LOGICAL :: check_eigenvectors
340 : #if defined(__parallel)
341 : TYPE(cp_blacs_env_type), POINTER :: context
342 : INTEGER :: il, jl, ipcol, iprow, &
343 : mypcol, myprow, npcol, nprow
344 : INTEGER, DIMENSION(9) :: desca
345 : #endif
346 :
347 389590 : CALL timeset(routineN, handle)
348 :
349 389590 : output_unit = default_output_unit
350 389590 : check_eigenvectors = diag_check_requested()
351 389590 : eps_warning = diag_check_warning_threshold()
352 389590 : eps_abort = 10.0_dp*eps_warning
353 :
354 389590 : gold = 0.0_dp
355 389590 : test = 0.0_dp
356 389590 : eps = 0.0_dp
357 :
358 389590 : IF (check_eigenvectors) THEN
359 : #if defined(__parallel)
360 336 : nrow = eigenvectors%matrix_struct%nrow_global
361 336 : ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
362 336 : CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
363 336 : context => matrix%matrix_struct%context
364 336 : myprow = context%mepos(1)
365 336 : mypcol = context%mepos(2)
366 336 : nprow = context%num_pe(1)
367 336 : npcol = context%num_pe(2)
368 3360 : desca(:) = matrix%matrix_struct%descriptor(:)
369 6044 : outer: DO j = 1, ncol
370 250176 : DO i = 1, ncol
371 244132 : CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
372 249840 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
373 122066 : gold = MERGE(0.0_dp, 1.0_dp, i /= j)
374 122066 : test = matrix%local_data(il, jl)
375 122066 : eps = ABS(test - gold)
376 122066 : IF (eps > eps_warning) EXIT outer
377 : END IF
378 : END DO
379 : END DO outer
380 : #else
381 : nrow = SIZE(eigenvectors%local_data, 1)
382 : ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
383 : CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
384 : eigenvectors%local_data(1, 1), nrow, &
385 : eigenvectors%local_data(1, 1), nrow, &
386 : 0.0_dp, matrix%local_data(1, 1), nrow)
387 : outer: DO j = 1, ncol
388 : DO i = 1, ncol
389 : gold = MERGE(0.0_dp, 1.0_dp, i /= j)
390 : test = matrix%local_data(i, j)
391 : eps = ABS(test - gold)
392 : IF (eps > eps_warning) EXIT outer
393 : END DO
394 : END DO outer
395 : #endif
396 336 : IF (eps > eps_warning) THEN
397 0 : IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
398 0 : diag_type_name = "SYEVD"
399 0 : ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
400 0 : diag_type_name = "ELPA"
401 0 : ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
402 0 : diag_type_name = "CUSOLVER"
403 0 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
404 0 : diag_type_name = "DLAF"
405 : ELSE
406 0 : CPABORT("Unknown diag_type")
407 : END IF
408 : WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
409 0 : "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
410 0 : "Matrix element (", i, ", ", j, ") = ", test, &
411 0 : "The deviation from the expected value ", gold, " is", eps
412 0 : IF (eps > eps_abort) THEN
413 0 : CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
414 : ELSE
415 0 : CPWARN("Check of matrix diagonalization failed in routine "//routineN)
416 : END IF
417 : END IF
418 : END IF
419 :
420 389590 : CALL timestop(handle)
421 :
422 389590 : END SUBROUTINE check_diag
423 :
424 : ! **************************************************************************************************
425 : !> \brief Check C^T*S*C = I for a generalized eigenvalue problem.
426 : !> \param overlap original overlap matrix S; used as work matrix and overwritten
427 : !> \param eigenvectors eigenvectors C to be checked
428 : !> \param scratch work matrix
429 : !> \param nvec ...
430 : ! **************************************************************************************************
431 2 : SUBROUTINE check_generalized_diag(overlap, eigenvectors, scratch, nvec)
432 :
433 : TYPE(cp_fm_type), INTENT(IN) :: eigenvectors
434 : TYPE(cp_fm_type), INTENT(INOUT) :: overlap, scratch
435 : INTEGER, INTENT(IN) :: nvec
436 :
437 : CHARACTER(LEN=*), PARAMETER :: routineN = 'check_generalized_diag'
438 :
439 : CHARACTER(LEN=default_string_length) :: diag_type_name
440 : REAL(KIND=dp) :: eps, eps_abort, eps_warning, gold, test
441 : INTEGER :: handle, i, j, ncol, nrow, output_unit
442 : #if defined(__parallel)
443 : TYPE(cp_blacs_env_type), POINTER :: context
444 : INTEGER :: il, jl, ipcol, iprow, &
445 : mypcol, myprow, npcol, nprow
446 : INTEGER, DIMENSION(9) :: desca
447 : #endif
448 :
449 2 : CALL timeset(routineN, handle)
450 :
451 2 : IF (.NOT. diag_check_requested()) THEN
452 0 : CALL timestop(handle)
453 0 : RETURN
454 : END IF
455 :
456 2 : output_unit = default_output_unit
457 2 : eps_warning = diag_check_warning_threshold()
458 2 : eps_abort = 10.0_dp*eps_warning
459 :
460 2 : nrow = eigenvectors%matrix_struct%nrow_global
461 2 : ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
462 :
463 2 : CALL parallel_gemm("N", "N", nrow, ncol, nrow, 1.0_dp, overlap, eigenvectors, 0.0_dp, scratch)
464 2 : CALL parallel_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, scratch, 0.0_dp, overlap)
465 :
466 2 : gold = 0.0_dp
467 2 : test = 0.0_dp
468 2 : eps = 0.0_dp
469 :
470 : #if defined(__parallel)
471 2 : context => overlap%matrix_struct%context
472 2 : myprow = context%mepos(1)
473 2 : mypcol = context%mepos(2)
474 2 : nprow = context%num_pe(1)
475 2 : npcol = context%num_pe(2)
476 20 : desca(:) = overlap%matrix_struct%descriptor(:)
477 18 : outer: DO j = 1, ncol
478 276 : DO i = 1, ncol
479 258 : CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
480 274 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
481 2 : gold = MERGE(0.0_dp, 1.0_dp, i /= j)
482 2 : test = overlap%local_data(il, jl)
483 2 : eps = ABS(test - gold)
484 2 : IF (eps > eps_warning) EXIT outer
485 : END IF
486 : END DO
487 : END DO outer
488 : #else
489 : outer: DO j = 1, ncol
490 : DO i = 1, ncol
491 : gold = MERGE(0.0_dp, 1.0_dp, i /= j)
492 : test = overlap%local_data(i, j)
493 : eps = ABS(test - gold)
494 : IF (eps > eps_warning) EXIT outer
495 : END DO
496 : END DO outer
497 : #endif
498 :
499 2 : IF (eps > eps_warning) THEN
500 1 : IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
501 1 : diag_type_name = "SYGVX"
502 0 : ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
503 0 : diag_type_name = "ELPA"
504 0 : ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
505 0 : diag_type_name = "CUSOLVER"
506 0 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
507 0 : diag_type_name = "DLAF"
508 : ELSE
509 0 : CPABORT("Unknown diag_type")
510 : END IF
511 : WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
512 1 : "The generalized eigenvectors returned by "//TRIM(diag_type_name)//" are not S-orthonormal", &
513 1 : "Matrix element (", i, ", ", j, ") = ", test, &
514 2 : "The deviation from the expected value ", gold, " is", eps
515 1 : IF (eps > eps_abort) THEN
516 0 : CPABORT("ERROR in "//routineN//": Check of generalized matrix diagonalization failed")
517 : ELSE
518 1 : CPWARN("Check of generalized matrix diagonalization failed in routine "//routineN)
519 : END IF
520 : END IF
521 :
522 2 : CALL timestop(handle)
523 :
524 : END SUBROUTINE check_generalized_diag
525 :
526 : ! **************************************************************************************************
527 : !> \brief Issues an error messages and exits (optionally only warns).
528 : !> \param mesg message to be issued
529 : !> \param info error code (optional)
530 : !> \param warn only warn (optional)
531 : ! **************************************************************************************************
532 0 : SUBROUTINE cp_fm_error(mesg, info, warn)
533 : CHARACTER(LEN=*), INTENT(IN) :: mesg
534 : INTEGER, INTENT(IN), OPTIONAL :: info
535 : LOGICAL, INTENT(IN), OPTIONAL :: warn
536 :
537 : CHARACTER(LEN=2*default_string_length) :: message
538 : LOGICAL :: warning
539 :
540 0 : IF (PRESENT(info)) THEN
541 0 : WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
542 : ELSE
543 0 : WRITE (message, "(A)") mesg
544 : END IF
545 :
546 0 : IF (PRESENT(warn)) THEN
547 0 : warning = warn
548 : ELSE ! abort
549 : warning = .FALSE.
550 : END IF
551 :
552 0 : IF (warning) THEN
553 0 : CPWARN(TRIM(message))
554 : ELSE
555 0 : CPABORT(TRIM(message))
556 : END IF
557 0 : END SUBROUTINE cp_fm_error
558 :
559 : ! **************************************************************************************************
560 : !> \brief Computes all eigenvalues and vectors of a real symmetric matrix
561 : !> significantly faster than syevx, scales also much better.
562 : !> Needs workspace to allocate all the eigenvectors
563 : !> \param matrix ...
564 : !> \param eigenvectors ...
565 : !> \param eigenvalues ...
566 : !> \param info ...
567 : !> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
568 : !> \par info If present returns error code and prevents program stops.
569 : !> Works currently only for scalapack.
570 : !> Other solvers will end the program regardless of PRESENT(info).
571 : ! **************************************************************************************************
572 188202 : SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
573 :
574 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
575 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
576 : INTEGER, INTENT(OUT), OPTIONAL :: info
577 :
578 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_syevd'
579 :
580 : INTEGER :: handle, myinfo, n, nmo
581 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
582 : #if defined(__parallel)
583 : TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
584 : #else
585 : INTEGER :: liwork, lwork
586 : INTEGER, DIMENSION(:), POINTER :: iwork
587 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: m
588 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
589 : INTEGER, TARGET :: v(1)
590 : REAL(KIND=dp), TARGET :: w(1)
591 : #endif
592 :
593 188202 : CALL timeset(routineN, handle)
594 :
595 188202 : myinfo = 0
596 :
597 188202 : n = matrix%matrix_struct%nrow_global
598 564606 : ALLOCATE (eig(n))
599 :
600 : #if defined(__parallel)
601 : ! Determine if the input matrix needs to be redistributed before diagonalization.
602 : ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
603 : ! The redistributed matrix is stored in matrix_new, which is just a pointer
604 : ! to the original matrix if no redistribution is required
605 188202 : CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
606 :
607 : ! Call scalapack on CPUs that hold the new matrix
608 188202 : IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
609 96028 : IF (PRESENT(info)) THEN
610 2780 : CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
611 : ELSE
612 93248 : CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
613 : END IF
614 : END IF
615 : ! Redistribute results and clean up
616 188202 : CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
617 : #else
618 : ! Retrieve the optimal work array sizes first
619 : lwork = -1
620 : liwork = -1
621 : m => matrix%local_data
622 : iwork => v
623 : work => w
624 :
625 : CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
626 :
627 : IF (myinfo /= 0) THEN
628 : CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
629 : END IF
630 :
631 : ! Reallocate work arrays and perform diagonalisation
632 : lwork = NINT(work(1))
633 : ALLOCATE (work(lwork))
634 :
635 : liwork = iwork(1)
636 : ALLOCATE (iwork(liwork))
637 :
638 : CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
639 :
640 : IF (myinfo /= 0) THEN
641 : CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
642 : END IF
643 :
644 : CALL cp_fm_to_fm(matrix, eigenvectors)
645 :
646 : DEALLOCATE (iwork)
647 : DEALLOCATE (work)
648 : #endif
649 :
650 188202 : IF (PRESENT(info)) info = myinfo
651 :
652 188202 : nmo = SIZE(eigenvalues, 1)
653 188202 : IF (nmo > n) THEN
654 0 : eigenvalues(1:n) = eig(1:n)
655 : ELSE
656 1738683 : eigenvalues(1:nmo) = eig(1:nmo)
657 : END IF
658 :
659 188202 : DEALLOCATE (eig)
660 :
661 188202 : CALL check_diag(matrix, eigenvectors, n)
662 :
663 188202 : CALL timestop(handle)
664 :
665 376404 : END SUBROUTINE cp_fm_syevd
666 :
667 : ! **************************************************************************************************
668 : !> \brief ...
669 : !> \param matrix ...
670 : !> \param eigenvectors ...
671 : !> \param eigenvalues ...
672 : !> \param info ...
673 : ! **************************************************************************************************
674 96028 : SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
675 :
676 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
677 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
678 : INTEGER, INTENT(OUT), OPTIONAL :: info
679 :
680 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_syevd_base'
681 :
682 : INTEGER :: handle, myinfo
683 : #if defined(__parallel)
684 : TYPE(cp_blacs_env_type), POINTER :: context
685 : INTEGER :: liwork, lwork, n
686 : INTEGER, DIMENSION(9) :: descm, descv
687 96028 : INTEGER, DIMENSION(:), POINTER :: iwork
688 96028 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
689 96028 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: m, v
690 : REAL(KIND=dp), TARGET :: w(1)
691 : #if defined (__HAS_IEEE_EXCEPTIONS)
692 : LOGICAL, DIMENSION(5) :: halt
693 : #endif
694 : #endif
695 :
696 96028 : CALL timeset(routineN, handle)
697 :
698 96028 : myinfo = 0
699 :
700 : #if defined(__parallel)
701 :
702 96028 : n = matrix%matrix_struct%nrow_global
703 96028 : m => matrix%local_data
704 96028 : context => matrix%matrix_struct%context
705 960280 : descm(:) = matrix%matrix_struct%descriptor(:)
706 :
707 96028 : v => eigenvectors%local_data
708 960280 : descv(:) = eigenvectors%matrix_struct%descriptor(:)
709 :
710 96028 : liwork = 7*n + 8*context%num_pe(2) + 2
711 288084 : ALLOCATE (iwork(liwork))
712 :
713 : ! Work space query
714 96028 : lwork = -1
715 96028 : work => w
716 :
717 : CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
718 96028 : work(1), lwork, iwork(1), liwork, myinfo)
719 :
720 96028 : IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
721 0 : CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
722 : END IF
723 :
724 96028 : lwork = NINT(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
725 : #if !defined(__SCALAPACK_NO_WA)
726 : ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
727 : CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
728 96028 : v(1, 1), 1, 1, descv, work(1), -1, myinfo)
729 :
730 96028 : IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
731 0 : CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
732 : END IF
733 :
734 96028 : IF (lwork < (work(1) + 2*n)) THEN
735 44015 : lwork = NINT(work(1)) + 2*n ! still wrong by 2*N
736 : END IF
737 : #endif
738 288084 : ALLOCATE (work(lwork))
739 :
740 : ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
741 96028 : IF (liwork < iwork(1)) THEN
742 0 : liwork = iwork(1)
743 0 : DEALLOCATE (iwork)
744 0 : ALLOCATE (iwork(liwork))
745 : END IF
746 :
747 : ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
748 : ! Therefore, we disable floating point traps temporarily.
749 : #if defined (__HAS_IEEE_EXCEPTIONS)
750 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
751 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
752 : #endif
753 :
754 : CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
755 96028 : work(1), lwork, iwork(1), liwork, myinfo)
756 :
757 : #if defined (__HAS_IEEE_EXCEPTIONS)
758 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
759 : #endif
760 96028 : IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
761 0 : CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
762 : END IF
763 :
764 96028 : IF (PRESENT(info)) info = myinfo
765 :
766 96028 : DEALLOCATE (work)
767 96028 : DEALLOCATE (iwork)
768 : #else
769 : MARK_USED(matrix)
770 : MARK_USED(eigenvectors)
771 : MARK_USED(eigenvalues)
772 : myinfo = -1
773 : IF (PRESENT(info)) info = myinfo
774 : CALL cp_fm_error("ERROR in "//TRIM(routineN)// &
775 : ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
776 : #endif
777 :
778 96028 : CALL timestop(handle)
779 :
780 96028 : END SUBROUTINE cp_fm_syevd_base
781 :
782 : ! **************************************************************************************************
783 : !> \brief compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
784 : !> If eigenvectors are required this routine will replicate a full matrix on each CPU...
785 : !> if more than a handful of vectors are needed, use cp_fm_syevd instead
786 : !> \param matrix ...
787 : !> \param eigenvectors ...
788 : !> \param eigenvalues ...
789 : !> \param neig ...
790 : !> \param work_syevx ...
791 : !> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
792 : !> neig is the number of vectors needed (default all)
793 : !> work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
794 : !> reducing this saves time, but might cause the routine to fail
795 : ! **************************************************************************************************
796 40 : SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
797 :
798 : ! Diagonalise the symmetric n by n matrix using the LAPACK library.
799 :
800 : TYPE(cp_fm_type), INTENT(IN) :: matrix
801 : TYPE(cp_fm_type), OPTIONAL, INTENT(IN) :: eigenvectors
802 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: work_syevx
803 : INTEGER, INTENT(IN), OPTIONAL :: neig
804 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
805 :
806 : CHARACTER(LEN=*), PARAMETER :: routineN = "cp_fm_syevx"
807 :
808 : #if defined(__parallel)
809 : REAL(KIND=dp), PARAMETER :: orfac = -1.0_dp
810 : #endif
811 : REAL(KIND=dp), PARAMETER :: vl = 0.0_dp, &
812 : vu = 0.0_dp
813 :
814 : TYPE(cp_blacs_env_type), POINTER :: context
815 : TYPE(cp_logger_type), POINTER :: logger
816 : CHARACTER(LEN=1) :: job_type
817 : REAL(KIND=dp) :: abstol, work_syevx_local
818 : INTEGER :: handle, info, liwork, lwork, &
819 : m, n, nb, npcol, nprow, &
820 : output_unit, neig_local
821 : LOGICAL :: ionode, needs_evecs
822 40 : INTEGER, DIMENSION(:), ALLOCATABLE :: ifail, iwork
823 40 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: w, work
824 40 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, z
825 :
826 : REAL(KIND=dp), EXTERNAL :: dlamch
827 :
828 : #if defined(__parallel)
829 : INTEGER :: nn, np0, npe, nq0, nz
830 : INTEGER, DIMENSION(9) :: desca, descz
831 40 : INTEGER, DIMENSION(:), ALLOCATABLE :: iclustr
832 40 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: gap
833 : INTEGER, EXTERNAL :: iceil, numroc
834 : #else
835 : INTEGER :: nla, nlz
836 : INTEGER, EXTERNAL :: ilaenv
837 : #endif
838 : #if defined (__HAS_IEEE_EXCEPTIONS)
839 : LOGICAL, DIMENSION(5) :: halt
840 : #endif
841 :
842 : ! by default all
843 40 : n = matrix%matrix_struct%nrow_global
844 40 : neig_local = n
845 40 : IF (PRESENT(neig)) neig_local = neig
846 40 : IF (neig_local == 0) RETURN
847 :
848 40 : CALL timeset(routineN, handle)
849 :
850 40 : needs_evecs = PRESENT(eigenvectors)
851 :
852 40 : logger => cp_get_default_logger()
853 40 : ionode = logger%para_env%is_source()
854 40 : n = matrix%matrix_struct%nrow_global
855 :
856 : ! by default allocate all needed space
857 40 : work_syevx_local = 1.0_dp
858 40 : IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
859 :
860 : ! set scalapack job type
861 40 : IF (needs_evecs) THEN
862 40 : job_type = "V"
863 : ELSE
864 0 : job_type = "N"
865 : END IF
866 :
867 : ! target the most accurate calculation of the eigenvalues
868 40 : abstol = 2.0_dp*dlamch("S")
869 :
870 40 : context => matrix%matrix_struct%context
871 40 : nprow = context%num_pe(1)
872 40 : npcol = context%num_pe(2)
873 :
874 120 : ALLOCATE (w(n))
875 560 : eigenvalues(:) = 0.0_dp
876 : #if defined(__parallel)
877 :
878 40 : IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
879 0 : CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
880 : END IF
881 :
882 40 : a => matrix%local_data
883 400 : desca(:) = matrix%matrix_struct%descriptor(:)
884 :
885 40 : IF (needs_evecs) THEN
886 40 : z => eigenvectors%local_data
887 400 : descz(:) = eigenvectors%matrix_struct%descriptor(:)
888 : ELSE
889 : ! z will not be referenced
890 0 : z => matrix%local_data
891 0 : descz = desca
892 : END IF
893 :
894 : ! Get the optimal work storage size
895 :
896 40 : npe = nprow*npcol
897 40 : nb = matrix%matrix_struct%nrow_block
898 40 : nn = MAX(n, nb, 2)
899 40 : np0 = numroc(nn, nb, 0, 0, nprow)
900 40 : nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
901 :
902 40 : IF (needs_evecs) THEN
903 : lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
904 40 : INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
905 : ELSE
906 0 : lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
907 : END IF
908 40 : liwork = 6*MAX(N, npe + 1, 4)
909 :
910 120 : ALLOCATE (gap(npe))
911 120 : gap = 0.0_dp
912 120 : ALLOCATE (iclustr(2*npe))
913 200 : iclustr = 0
914 120 : ALLOCATE (ifail(n))
915 560 : ifail = 0
916 120 : ALLOCATE (iwork(liwork))
917 120 : ALLOCATE (work(lwork))
918 :
919 : ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
920 : ! Therefore, we disable floating point traps temporarily.
921 : #if defined (__HAS_IEEE_EXCEPTIONS)
922 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
923 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
924 : #endif
925 : CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
926 40 : z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
927 : #if defined (__HAS_IEEE_EXCEPTIONS)
928 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
929 : #endif
930 :
931 : ! Error handling
932 40 : IF (info /= 0) THEN
933 0 : IF (ionode) THEN
934 0 : output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
935 : WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
936 0 : "info = ", info, &
937 0 : "lwork = ", lwork, &
938 0 : "liwork = ", liwork, &
939 0 : "nz = ", nz
940 0 : IF (info > 0) THEN
941 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
942 0 : "ifail = ", ifail
943 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
944 0 : "iclustr = ", iclustr
945 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
946 0 : "gap = ", gap
947 : END IF
948 : END IF
949 0 : CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
950 : END IF
951 :
952 : ! Release work storage
953 40 : DEALLOCATE (gap)
954 40 : DEALLOCATE (iclustr)
955 :
956 : #else
957 :
958 : a => matrix%local_data
959 : IF (needs_evecs) THEN
960 : z => eigenvectors%local_data
961 : ELSE
962 : ! z will not be referenced
963 : z => matrix%local_data
964 : END IF
965 :
966 : ! Get the optimal work storage size
967 :
968 : nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
969 : ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
970 :
971 : lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
972 : liwork = 5*n
973 :
974 : ALLOCATE (ifail(n))
975 : ifail = 0
976 : ALLOCATE (iwork(liwork))
977 : ALLOCATE (work(lwork))
978 : info = 0
979 : nla = SIZE(a, 1)
980 : nlz = SIZE(z, 1)
981 :
982 : ! LAPACK takes advantage of IEEE754 exceptions for speedup.
983 : ! Therefore, we disable floating point traps temporarily.
984 : #if defined (__HAS_IEEE_EXCEPTIONS)
985 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
986 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
987 : #endif
988 : CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
989 : abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
990 : #if defined (__HAS_IEEE_EXCEPTIONS)
991 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
992 : #endif
993 :
994 : ! Error handling
995 : IF (info /= 0) THEN
996 : output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
997 : WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
998 : "info = ", info
999 : IF (info > 0) THEN
1000 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
1001 : "ifail = ", ifail
1002 : END IF
1003 : CPABORT("Error in DSYEVX (ScaLAPACK)")
1004 : END IF
1005 :
1006 : #endif
1007 : ! Release work storage
1008 40 : DEALLOCATE (ifail)
1009 40 : DEALLOCATE (iwork)
1010 40 : DEALLOCATE (work)
1011 400 : eigenvalues(1:neig_local) = w(1:neig_local)
1012 40 : DEALLOCATE (w)
1013 :
1014 40 : IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
1015 :
1016 40 : CALL timestop(handle)
1017 :
1018 120 : END SUBROUTINE cp_fm_syevx
1019 :
1020 : ! **************************************************************************************************
1021 : !> \brief decomposes a quadratic matrix into its singular value decomposition
1022 : !> \param matrix_a ...
1023 : !> \param matrix_eigvl ...
1024 : !> \param matrix_eigvr_t ...
1025 : !> \param eigval ...
1026 : !> \param info ...
1027 : !> \author Maximilian Graml
1028 : ! **************************************************************************************************
1029 100 : SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
1030 :
1031 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1032 : TYPE(cp_fm_type), INTENT(INOUT) :: matrix_eigvl, matrix_eigvr_t
1033 : REAL(KIND=dp), DIMENSION(:), POINTER, &
1034 : INTENT(INOUT) :: eigval
1035 : INTEGER, INTENT(OUT), OPTIONAL :: info
1036 :
1037 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
1038 :
1039 : INTEGER :: handle, n, m, myinfo, lwork
1040 100 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1041 : TYPE(cp_fm_type) :: matrix_lu
1042 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
1043 : REAL(KIND=dp), TARGET :: w(1)
1044 : #if defined(__parallel)
1045 : INTEGER, DIMENSION(9) :: desca, descu, descvt
1046 : #endif
1047 :
1048 100 : CALL timeset(routineN, handle)
1049 :
1050 : CALL cp_fm_create(matrix=matrix_lu, &
1051 : matrix_struct=matrix_a%matrix_struct, &
1052 100 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1053 100 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
1054 100 : a => matrix_lu%local_data
1055 100 : m = matrix_lu%matrix_struct%nrow_global
1056 100 : n = matrix_lu%matrix_struct%ncol_global
1057 : ! Assert that incoming matrix is quadratic
1058 100 : CPASSERT(m == n)
1059 :
1060 : ! Prepare for workspace queries
1061 100 : myinfo = 0
1062 100 : lwork = -1
1063 100 : work => w
1064 : #if defined(__parallel)
1065 : ! To do: That might need a redistribution check as in cp_fm_syevd
1066 1000 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1067 1000 : descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
1068 1000 : descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
1069 :
1070 : ! Workspace query
1071 : CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
1072 100 : 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
1073 :
1074 100 : IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
1075 0 : CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
1076 : END IF
1077 :
1078 100 : lwork = NINT(work(1))
1079 300 : ALLOCATE (work(lwork))
1080 :
1081 : CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
1082 100 : 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
1083 :
1084 100 : IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
1085 0 : CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
1086 : END IF
1087 : #else
1088 : ! Workspace query
1089 : CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
1090 : m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
1091 :
1092 : IF (myinfo /= 0) THEN
1093 : CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
1094 : END IF
1095 :
1096 : lwork = NINT(work(1))
1097 : ALLOCATE (work(lwork))
1098 :
1099 : CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
1100 : m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
1101 :
1102 : IF (myinfo /= 0) THEN
1103 : CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
1104 : END IF
1105 :
1106 : #endif
1107 : ! Release intermediary matrices
1108 100 : DEALLOCATE (work)
1109 100 : CALL cp_fm_release(matrix_lu)
1110 :
1111 100 : IF (PRESENT(info)) info = myinfo
1112 :
1113 100 : CALL timestop(handle)
1114 100 : END SUBROUTINE cp_fm_svd
1115 :
1116 : ! **************************************************************************************************
1117 : !> \brief ...
1118 : !> \param matrix ...
1119 : !> \param work ...
1120 : !> \param exponent ...
1121 : !> \param threshold ...
1122 : !> \param n_dependent ...
1123 : !> \param verbose ...
1124 : !> \param eigvals ...
1125 : ! **************************************************************************************************
1126 2198 : SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
1127 :
1128 : ! Raise the real symmetric n by n matrix to the power given by
1129 : ! the exponent. All eigenvectors with a corresponding eigenvalue lower
1130 : ! than threshold are quenched. result in matrix
1131 :
1132 : ! - Creation (29.03.1999, Matthias Krack)
1133 : ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
1134 :
1135 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1136 : REAL(KIND=dp), INTENT(IN) :: exponent, threshold
1137 : INTEGER, INTENT(OUT) :: n_dependent
1138 : LOGICAL, INTENT(IN), OPTIONAL :: verbose
1139 : REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
1140 : OPTIONAL :: eigvals
1141 :
1142 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_power'
1143 :
1144 : INTEGER :: handle, icol_global, &
1145 : mypcol, myprow, &
1146 : ncol_global, nrow_global
1147 : LOGICAL :: my_verbose
1148 : REAL(KIND=dp) :: condition_number, f, p
1149 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eigenvalues
1150 2198 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: eigenvectors
1151 : TYPE(cp_blacs_env_type), POINTER :: context
1152 :
1153 : #if defined(__parallel)
1154 : INTEGER :: icol_local, ipcol, iprow, irow_global, irow_local
1155 : #endif
1156 :
1157 2198 : CALL timeset(routineN, handle)
1158 :
1159 2198 : my_verbose = .FALSE.
1160 2198 : IF (PRESENT(verbose)) my_verbose = verbose
1161 :
1162 2198 : context => matrix%matrix_struct%context
1163 2198 : myprow = context%mepos(1)
1164 2198 : mypcol = context%mepos(2)
1165 2198 : n_dependent = 0
1166 2198 : p = 0.5_dp*exponent
1167 :
1168 2198 : nrow_global = matrix%matrix_struct%nrow_global
1169 2198 : ncol_global = matrix%matrix_struct%ncol_global
1170 :
1171 6594 : ALLOCATE (eigenvalues(ncol_global))
1172 64505 : eigenvalues(:) = 0.0_dp
1173 :
1174 : ! Compute the eigenvectors and eigenvalues
1175 :
1176 2198 : CALL choose_eigv_solver(matrix, work, eigenvalues)
1177 :
1178 2198 : IF (PRESENT(eigvals)) THEN
1179 350 : eigvals(1) = eigenvalues(1)
1180 350 : eigvals(2) = eigenvalues(ncol_global)
1181 : END IF
1182 :
1183 : #if defined(__parallel)
1184 2198 : eigenvectors => work%local_data
1185 :
1186 : ! Build matrix**exponent with eigenvector quenching
1187 :
1188 64505 : DO icol_global = 1, ncol_global
1189 :
1190 64505 : IF (eigenvalues(icol_global) < threshold) THEN
1191 :
1192 50 : n_dependent = n_dependent + 1
1193 :
1194 50 : ipcol = work%matrix_struct%g2p_col(icol_global)
1195 :
1196 50 : IF (mypcol == ipcol) THEN
1197 50 : icol_local = work%matrix_struct%g2l_col(icol_global)
1198 5850 : DO irow_global = 1, nrow_global
1199 5800 : iprow = work%matrix_struct%g2p_row(irow_global)
1200 5850 : IF (myprow == iprow) THEN
1201 2900 : irow_local = work%matrix_struct%g2l_row(irow_global)
1202 2900 : eigenvectors(irow_local, icol_local) = 0.0_dp
1203 : END IF
1204 : END DO
1205 : END IF
1206 :
1207 : ELSE
1208 :
1209 62257 : f = eigenvalues(icol_global)**p
1210 :
1211 62257 : ipcol = work%matrix_struct%g2p_col(icol_global)
1212 :
1213 62257 : IF (mypcol == ipcol) THEN
1214 62257 : icol_local = work%matrix_struct%g2l_col(icol_global)
1215 3933232 : DO irow_global = 1, nrow_global
1216 3870975 : iprow = work%matrix_struct%g2p_row(irow_global)
1217 3933232 : IF (myprow == iprow) THEN
1218 1979562 : irow_local = work%matrix_struct%g2l_row(irow_global)
1219 : eigenvectors(irow_local, icol_local) = &
1220 1979562 : f*eigenvectors(irow_local, icol_local)
1221 : END IF
1222 : END DO
1223 : END IF
1224 :
1225 : END IF
1226 :
1227 : END DO
1228 :
1229 : #else
1230 :
1231 : eigenvectors => work%local_data
1232 :
1233 : ! Build matrix**exponent with eigenvector quenching
1234 :
1235 : DO icol_global = 1, ncol_global
1236 :
1237 : IF (eigenvalues(icol_global) < threshold) THEN
1238 :
1239 : n_dependent = n_dependent + 1
1240 : eigenvectors(1:nrow_global, icol_global) = 0.0_dp
1241 :
1242 : ELSE
1243 :
1244 : f = eigenvalues(icol_global)**p
1245 : eigenvectors(1:nrow_global, icol_global) = &
1246 : f*eigenvectors(1:nrow_global, icol_global)
1247 :
1248 : END IF
1249 :
1250 : END DO
1251 :
1252 : #endif
1253 2198 : CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
1254 2198 : CALL cp_fm_uplo_to_full(matrix, work)
1255 :
1256 : ! Print some warnings/notes
1257 2198 : IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
1258 0 : condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
1259 : WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
1260 0 : "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
1261 0 : "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
1262 0 : "CP_FM_POWER: condition number: ", condition_number
1263 0 : IF (eigenvalues(1) <= 0.0_dp) THEN
1264 : WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
1265 0 : "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
1266 : END IF
1267 0 : IF (condition_number > 1.0E12_dp) THEN
1268 : WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
1269 0 : "WARNING: high condition number => possibly ill-conditioned matrix"
1270 : END IF
1271 : END IF
1272 :
1273 2198 : DEALLOCATE (eigenvalues)
1274 :
1275 2198 : CALL timestop(handle)
1276 :
1277 2198 : END SUBROUTINE cp_fm_power
1278 :
1279 : ! **************************************************************************************************
1280 : !> \brief ...
1281 : !> \param matrix ...
1282 : !> \param eigenvectors ...
1283 : !> \param eigval ...
1284 : !> \param thresh ...
1285 : !> \param start_sec_block ...
1286 : ! **************************************************************************************************
1287 18 : SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
1288 :
1289 : ! Calculates block diagonalization of a full symmetric matrix
1290 : ! It has its origin in cp_fm_syevx. This routine rotates only elements
1291 : ! which are larger than a threshold values "thresh".
1292 : ! start_sec_block is the start of the second block.
1293 : ! IT DOES ONLY ONE SWEEP!
1294 :
1295 : ! - Creation (07.10.2002, Martin Fengler)
1296 : ! - Cosmetics (05.04.06, MK)
1297 :
1298 : TYPE(cp_fm_type), INTENT(IN) :: eigenvectors, matrix
1299 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: eigval
1300 : INTEGER, INTENT(IN) :: start_sec_block
1301 : REAL(KIND=dp), INTENT(IN) :: thresh
1302 :
1303 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_block_jacobi'
1304 :
1305 : INTEGER :: handle
1306 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, ev
1307 :
1308 : REAL(KIND=dp) :: tan_theta, tau, c, s
1309 : INTEGER :: q, p, N
1310 18 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: c_ip
1311 :
1312 : #if defined(__parallel)
1313 : TYPE(cp_blacs_env_type), POINTER :: context
1314 :
1315 : INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
1316 : ev_row_block_size, iam, mynumrows, mype, npe, q_loc
1317 18 : REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE :: a_loc, ev_loc
1318 : INTEGER, DIMENSION(9) :: desca, descz, &
1319 : desc_a_block, &
1320 : desc_ev_loc
1321 : TYPE(mp_comm_type):: allgrp
1322 : TYPE(cp_blacs_type) :: ictxt_loc
1323 : INTEGER, EXTERNAL :: numroc
1324 : #endif
1325 :
1326 : ! -------------------------------------------------------------------------
1327 :
1328 18 : CALL timeset(routineN, handle)
1329 :
1330 : #if defined(__parallel)
1331 18 : context => matrix%matrix_struct%context
1332 18 : allgrp = matrix%matrix_struct%para_env
1333 :
1334 18 : nprow = context%num_pe(1)
1335 18 : npcol = context%num_pe(2)
1336 :
1337 18 : N = matrix%matrix_struct%nrow_global
1338 :
1339 18 : A => matrix%local_data
1340 180 : desca(:) = matrix%matrix_struct%descriptor(:)
1341 18 : EV => eigenvectors%local_data
1342 180 : descz(:) = eigenvectors%matrix_struct%descriptor(:)
1343 :
1344 : ! Copy the block to be rotated to the master process firstly and broadcast to all processes
1345 : ! start_sec_block defines where the second block starts!
1346 : ! Block will be processed together with the OO block
1347 :
1348 18 : block_dim_row = start_sec_block - 1
1349 18 : block_dim_col = N - block_dim_row
1350 72 : ALLOCATE (A_loc(block_dim_row, block_dim_col))
1351 :
1352 18 : mype = matrix%matrix_struct%para_env%mepos
1353 18 : npe = matrix%matrix_struct%para_env%num_pe
1354 : ! Get a new context
1355 18 : CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
1356 :
1357 : CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
1358 18 : block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
1359 :
1360 : CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
1361 18 : A_loc, 1, 1, desc_a_block, context%get_handle())
1362 : ! Only the master (root) process received data yet
1363 18 : CALL allgrp%bcast(A_loc, 0)
1364 :
1365 : ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
1366 : ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
1367 :
1368 : ! Initialize distribution of the eigenvectors
1369 18 : iam = mype
1370 18 : ev_row_block_size = n/(nprow*npcol)
1371 18 : mynumrows = NUMROC(N, ev_row_block_size, iam, 0, nprow*npcol)
1372 :
1373 108 : ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
1374 :
1375 : CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
1376 18 : mynumrows, info)
1377 :
1378 18 : CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
1379 :
1380 : ! Start block diagonalization of matrix
1381 :
1382 18 : q_loc = 0
1383 :
1384 1170 : DO q = start_sec_block, N
1385 1152 : q_loc = q_loc + 1
1386 148626 : DO p = 1, (start_sec_block - 1)
1387 :
1388 148608 : IF (ABS(A_loc(p, q_loc)) > thresh) THEN
1389 :
1390 117566 : tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
1391 :
1392 117566 : tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
1393 :
1394 : ! Cos(theta)
1395 117566 : c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
1396 117566 : s = tan_theta*c
1397 :
1398 : ! Calculate eigenvectors: Q*J
1399 : ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
1400 : ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
1401 : ! EV(:,p) = c_ip
1402 : ! EV(:,q) = c_iq
1403 117566 : CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
1404 117566 : CALL dscal(mynumrows, c, EV_loc(1, p), 1)
1405 117566 : CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
1406 117566 : CALL dscal(mynumrows, c, EV_loc(1, q), 1)
1407 117566 : CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
1408 :
1409 : END IF
1410 :
1411 : END DO
1412 : END DO
1413 :
1414 : ! Copy eigenvectors back to the original distribution
1415 18 : CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
1416 :
1417 : ! Release work storage
1418 18 : DEALLOCATE (A_loc, EV_loc, c_ip)
1419 :
1420 18 : CALL ictxt_loc%gridexit()
1421 :
1422 : #else
1423 :
1424 : N = matrix%matrix_struct%nrow_global
1425 :
1426 : ALLOCATE (c_ip(N)) ! Local eigenvalue vector
1427 :
1428 : A => matrix%local_data ! Contains the Matrix to be worked on
1429 : EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
1430 :
1431 : ! Start matrix diagonalization
1432 :
1433 : tan_theta = 0.0_dp
1434 : tau = 0.0_dp
1435 :
1436 : DO q = start_sec_block, N
1437 : DO p = 1, (start_sec_block - 1)
1438 :
1439 : IF (ABS(A(p, q)) > thresh) THEN
1440 :
1441 : tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
1442 :
1443 : tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
1444 :
1445 : ! Cos(theta)
1446 : c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
1447 : s = tan_theta*c
1448 :
1449 : ! Calculate eigenvectors: Q*J
1450 : ! c_ip = c*EV(:,p) - s*EV(:,q)
1451 : ! c_iq = s*EV(:,p) + c*EV(:,q)
1452 : ! EV(:,p) = c_ip
1453 : ! EV(:,q) = c_iq
1454 : CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
1455 : CALL dscal(N, c, EV(1, p), 1)
1456 : CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
1457 : CALL dscal(N, c, EV(1, q), 1)
1458 : CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
1459 :
1460 : END IF
1461 :
1462 : END DO
1463 : END DO
1464 :
1465 : ! Release work storage
1466 :
1467 : DEALLOCATE (c_ip)
1468 :
1469 : #endif
1470 :
1471 18 : CALL timestop(handle)
1472 :
1473 90 : END SUBROUTINE cp_fm_block_jacobi
1474 :
1475 : ! **************************************************************************************************
1476 : !> \brief General Eigenvalue Problem AX = BXE.
1477 : !> Use cuSOLVERMp directly when requested and large enough; otherwise
1478 : !> reduce the problem through a Cholesky decomposition of B.
1479 : !> \param amatrix ...
1480 : !> \param bmatrix ...
1481 : !> \param eigenvectors ...
1482 : !> \param eigenvalues ...
1483 : !> \param work ...
1484 : ! **************************************************************************************************
1485 818 : SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
1486 :
1487 : TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1488 : REAL(KIND=dp), DIMENSION(:) :: eigenvalues
1489 : TYPE(cp_fm_type), INTENT(IN) :: work
1490 :
1491 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geeig'
1492 :
1493 : INTEGER :: handle, nao, nmo
1494 : LOGICAL :: check_eigenvectors
1495 : TYPE(cp_fm_type) :: overlap_check, scratch_check
1496 :
1497 818 : CALL timeset(routineN, handle)
1498 :
1499 818 : CALL cp_fm_get_info(amatrix, nrow_global=nao)
1500 818 : nmo = SIZE(eigenvalues)
1501 818 : check_eigenvectors = diag_check_requested()
1502 :
1503 818 : IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. direct_generalized_diagonalization .AND. &
1504 : nao >= cusolver_n_min) THEN
1505 : ! Use cuSolverMP generalized eigenvalue solver without a CP2K-side
1506 : ! Cholesky reduction.
1507 : ! Use work as intermediate buffer since eigenvectors may be smaller (nao x nmo)
1508 0 : IF (check_eigenvectors) THEN
1509 0 : CALL cp_fm_create(overlap_check, bmatrix%matrix_struct)
1510 0 : CALL cp_fm_create(scratch_check, bmatrix%matrix_struct)
1511 0 : CALL cp_fm_to_fm(bmatrix, overlap_check)
1512 : END IF
1513 0 : CALL cp_fm_general_cusolver(amatrix, bmatrix, work, eigenvalues)
1514 0 : IF (check_eigenvectors) THEN
1515 0 : CALL check_generalized_diag(overlap_check, work, scratch_check, nmo)
1516 0 : CALL cp_fm_release(scratch_check)
1517 0 : CALL cp_fm_release(overlap_check)
1518 : END IF
1519 0 : CALL cp_fm_to_fm(work, eigenvectors, nmo)
1520 : #if defined(__parallel)
1521 818 : ELSE IF (diag_type == FM_DIAG_TYPE_SCALAPACK .AND. direct_generalized_diagonalization) THEN
1522 : ! Use ScaLAPACK generalized eigenvalue solver without a CP2K-side
1523 : ! Cholesky reduction.
1524 2 : IF (check_eigenvectors) THEN
1525 2 : CALL cp_fm_create(overlap_check, bmatrix%matrix_struct)
1526 2 : CALL cp_fm_create(scratch_check, bmatrix%matrix_struct)
1527 2 : CALL cp_fm_to_fm(bmatrix, overlap_check)
1528 : END IF
1529 2 : CALL cp_fm_geeig_scalapack(amatrix, bmatrix, work, eigenvalues)
1530 2 : IF (check_eigenvectors) THEN
1531 2 : CALL check_generalized_diag(overlap_check, work, scratch_check, nmo)
1532 2 : CALL cp_fm_release(scratch_check)
1533 2 : CALL cp_fm_release(overlap_check)
1534 : END IF
1535 2 : CALL cp_fm_to_fm(work, eigenvectors, nmo)
1536 : #endif
1537 : #if defined(__DLAF)
1538 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. direct_generalized_diagonalization .AND. &
1539 : nao >= dlaf_neigvec_min) THEN
1540 : ! Use DLA-Future generalized eigenvalue solver for large matrices
1541 : IF (check_eigenvectors) THEN
1542 : CALL cp_fm_create(overlap_check, bmatrix%matrix_struct)
1543 : CALL cp_fm_create(scratch_check, bmatrix%matrix_struct)
1544 : CALL cp_fm_to_fm(bmatrix, overlap_check)
1545 : END IF
1546 : CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, work, eigenvalues)
1547 : IF (check_eigenvectors) THEN
1548 : CALL check_generalized_diag(overlap_check, work, scratch_check, nmo)
1549 : CALL cp_fm_release(scratch_check)
1550 : CALL cp_fm_release(overlap_check)
1551 : END IF
1552 : CALL cp_fm_to_fm(work, eigenvectors, nmo)
1553 : #endif
1554 : ELSE
1555 : ! Cholesky decompose S=U(T)U
1556 816 : CALL cp_fm_cholesky_decompose(bmatrix)
1557 : ! Invert to get U^(-1)
1558 816 : CALL cp_fm_triangular_invert(bmatrix)
1559 : ! Reduce to get U^(-T) * H * U^(-1)
1560 816 : CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
1561 816 : CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
1562 : ! Diagonalize
1563 : CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
1564 816 : eigenvalues=eigenvalues)
1565 : ! Restore vectors C = U^(-1) * C*
1566 816 : CALL cp_fm_triangular_multiply(bmatrix, work)
1567 816 : CALL cp_fm_to_fm(work, eigenvectors, nmo)
1568 : END IF
1569 :
1570 818 : CALL timestop(handle)
1571 :
1572 818 : END SUBROUTINE cp_fm_geeig
1573 :
1574 : ! **************************************************************************************************
1575 : !> \brief General Eigenvalue Problem AX = BXE using ScaLAPACK PDSYGVX.
1576 : !> \param amatrix ...
1577 : !> \param bmatrix ...
1578 : !> \param eigenvectors ...
1579 : !> \param eigenvalues ...
1580 : ! **************************************************************************************************
1581 2 : SUBROUTINE cp_fm_geeig_scalapack(amatrix, bmatrix, eigenvectors, eigenvalues)
1582 :
1583 : TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1584 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
1585 :
1586 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geeig_scalapack'
1587 :
1588 : #if defined(__parallel)
1589 : REAL(KIND=dp), PARAMETER :: orfac = -1.0_dp, &
1590 : vl = 0.0_dp, &
1591 : vu = 0.0_dp
1592 :
1593 : INTEGER :: handle, info, liwork, lwork, m, n, nb, &
1594 : neig, npcol, nprow, nz
1595 : INTEGER, DIMENSION(9) :: desca, descb, descz
1596 2 : INTEGER, DIMENSION(:), ALLOCATABLE :: iclustr, ifail, iwork
1597 : REAL(KIND=dp) :: abstol
1598 2 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: gap, w, work
1599 2 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, z
1600 :
1601 : INTEGER :: mq0, nn, np0, npe
1602 : INTEGER, EXTERNAL :: iceil, numroc
1603 : REAL(KIND=dp), EXTERNAL :: dlamch
1604 : #if defined (__HAS_IEEE_EXCEPTIONS)
1605 : LOGICAL, DIMENSION(5) :: halt
1606 : #endif
1607 : #else
1608 : INTEGER :: handle
1609 : #endif
1610 :
1611 2 : CALL timeset(routineN, handle)
1612 :
1613 : #if defined(__parallel)
1614 2 : n = amatrix%matrix_struct%nrow_global
1615 2 : neig = MIN(SIZE(eigenvalues), n)
1616 :
1617 2 : IF (neig == 0) THEN
1618 0 : CALL timestop(handle)
1619 0 : RETURN
1620 : END IF
1621 :
1622 2 : IF (amatrix%matrix_struct%nrow_block /= amatrix%matrix_struct%ncol_block) THEN
1623 0 : CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
1624 : END IF
1625 :
1626 2 : a => amatrix%local_data
1627 2 : b => bmatrix%local_data
1628 2 : z => eigenvectors%local_data
1629 20 : desca(:) = amatrix%matrix_struct%descriptor(:)
1630 20 : descb(:) = bmatrix%matrix_struct%descriptor(:)
1631 20 : descz(:) = eigenvectors%matrix_struct%descriptor(:)
1632 :
1633 2 : nprow = amatrix%matrix_struct%context%num_pe(1)
1634 2 : npcol = amatrix%matrix_struct%context%num_pe(2)
1635 2 : npe = nprow*npcol
1636 2 : nb = amatrix%matrix_struct%nrow_block
1637 2 : nn = MAX(n, nb, 2)
1638 2 : np0 = numroc(nn, nb, 0, 0, nprow)
1639 2 : mq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
1640 :
1641 : lwork = 5*n + MAX(5*nn, np0*mq0 + 2*nb*nb) + iceil(neig, npe)*nn + &
1642 2 : MAX(0, neig - 1)*n
1643 2 : liwork = 6*MAX(n, npe + 1, 4)
1644 :
1645 6 : ALLOCATE (gap(npe))
1646 6 : gap = 0.0_dp
1647 6 : ALLOCATE (iclustr(2*npe))
1648 10 : iclustr = 0
1649 6 : ALLOCATE (ifail(n))
1650 354 : ifail = 0
1651 6 : ALLOCATE (iwork(liwork))
1652 6 : ALLOCATE (w(n))
1653 6 : ALLOCATE (work(lwork))
1654 :
1655 2 : abstol = 2.0_dp*dlamch("S")
1656 :
1657 : #if defined (__HAS_IEEE_EXCEPTIONS)
1658 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
1659 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
1660 : #endif
1661 : CALL pdsygvx(1, "V", "I", "U", n, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, &
1662 : vl, vu, 1, neig, abstol, m, nz, w(1), orfac, z(1, 1), 1, 1, descz, &
1663 2 : work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap(1), info)
1664 : #if defined (__HAS_IEEE_EXCEPTIONS)
1665 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
1666 : #endif
1667 :
1668 2 : IF (info /= 0 .OR. m < neig .OR. nz < neig) THEN
1669 0 : CPABORT("ERROR in PDSYGVX (ScaLAPACK), info="//TRIM(cp_to_string(info)))
1670 : END IF
1671 :
1672 34 : eigenvalues(:) = 0.0_dp
1673 34 : eigenvalues(1:neig) = w(1:neig)
1674 :
1675 2 : DEALLOCATE (gap, iclustr, ifail, iwork, w, work)
1676 : #else
1677 : MARK_USED(amatrix)
1678 : MARK_USED(bmatrix)
1679 : MARK_USED(eigenvectors)
1680 : MARK_USED(eigenvalues)
1681 : CPABORT("ERROR in "//routineN//": PDSYGVX requested without ScaLAPACK support")
1682 : #endif
1683 :
1684 2 : CALL timestop(handle)
1685 :
1686 2 : END SUBROUTINE cp_fm_geeig_scalapack
1687 :
1688 : ! **************************************************************************************************
1689 : !> \brief General Eigenvalue Problem AX = BXE
1690 : !> Use canonical diagonalization : U*s**(-1/2)
1691 : !> \param amatrix ...
1692 : !> \param bmatrix ...
1693 : !> \param eigenvectors ...
1694 : !> \param eigenvalues ...
1695 : !> \param work ...
1696 : !> \param epseig ...
1697 : ! **************************************************************************************************
1698 68 : SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
1699 :
1700 : TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1701 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
1702 : TYPE(cp_fm_type), INTENT(IN) :: work
1703 : REAL(KIND=dp), INTENT(IN) :: epseig
1704 :
1705 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geeig_canon'
1706 :
1707 : INTEGER :: handle, i, icol, irow, nao, nc, ncol, &
1708 : nmo, nx
1709 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: evals
1710 :
1711 68 : CALL timeset(routineN, handle)
1712 :
1713 : ! Test sizees
1714 68 : CALL cp_fm_get_info(amatrix, nrow_global=nao)
1715 68 : nmo = SIZE(eigenvalues)
1716 204 : ALLOCATE (evals(nao))
1717 :
1718 : ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
1719 68 : CALL cp_fm_scale(-1.0_dp, bmatrix)
1720 68 : CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
1721 4726 : evals(:) = -evals(:)
1722 68 : nc = nao
1723 4434 : DO i = 1, nao
1724 4434 : IF (evals(i) < epseig) THEN
1725 40 : nc = i - 1
1726 40 : EXIT
1727 : END IF
1728 : END DO
1729 68 : CPASSERT(nc /= 0)
1730 :
1731 68 : IF (nc /= nao) THEN
1732 40 : IF (nc < nmo) THEN
1733 : ! Copy NULL space definition to last vectors of eigenvectors (if needed)
1734 0 : ncol = nmo - nc
1735 0 : CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
1736 : END IF
1737 : ! Set NULL space in eigenvector matrix of S to zero
1738 332 : DO icol = nc + 1, nao
1739 36172 : DO irow = 1, nao
1740 36132 : CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
1741 : END DO
1742 : END DO
1743 : ! Set small eigenvalues to a dummy save value
1744 332 : evals(nc + 1:nao) = 1.0_dp
1745 : END IF
1746 : ! Calculate U*s**(-1/2)
1747 4726 : evals(:) = 1.0_dp/SQRT(evals(:))
1748 68 : CALL cp_fm_column_scale(work, evals)
1749 : ! Reduce to get U^(-T) * H * U^(-1)
1750 68 : CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
1751 68 : CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
1752 68 : IF (nc /= nao) THEN
1753 : ! set diagonal values to save large value
1754 332 : DO icol = nc + 1, nao
1755 332 : CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
1756 : END DO
1757 : END IF
1758 : ! Diagonalize
1759 68 : CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
1760 68 : nx = MIN(nc, nmo)
1761 : ! Restore vectors C = U^(-1) * C*
1762 68 : CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
1763 :
1764 68 : DEALLOCATE (evals)
1765 :
1766 68 : CALL timestop(handle)
1767 :
1768 68 : END SUBROUTINE cp_fm_geeig_canon
1769 :
1770 : END MODULE cp_fm_diag
|