Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! Copyright (c) 2016 Anton Shterenlikht, The University of Bristol, UK
9 : !
10 : ! Redistribution and use in source and binary forms, with or without
11 : ! modification, are permitted provided that the following conditions are
12 : ! met:
13 : !
14 : ! 1. Redistributions of source code must retain the above copyright
15 : ! notice, this list of conditions and the following disclaimer.
16 : !
17 : ! 2. Redistributions in binary form must reproduce the above copyright
18 : ! notice, this list of conditions and the following disclaimer in the
19 : ! documentation and/or other materials provided with the distribution.
20 : !
21 : ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 : ! IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 : ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 : ! PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 : ! HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 : ! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 : ! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 : ! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 : ! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 : ! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 : ! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 : !
33 : ! Module with error function and related functions.
34 : ! Two algorithms are implemented: Poppe and Wijers (faddeyeva_fast, erfz_fast)
35 : ! and Zaghloul and Ali (faddeyeva_accurate, erfz_accurate). The second algorithm is
36 : ! supposed to be more accurate for some values. However, the first
37 : ! algorithm can be over an order of magnitude faster.
38 : !
39 : ! This code was written before the author became aware of
40 : ! M. R. Zaghloul, Remark on "Algorithm 916: Computing the
41 : ! Faddeyeva and Voight functions": efficiency improvements and
42 : ! Fortran translation, ACM Trans. Math. Software 42, Article 26, 2016.
43 :
44 : ! **************************************************************************************************
45 : !> \brief Module to compute the error function of a complex argument
46 : !> \par History
47 : !> 08.2025 Adapted to use CP2K intrinsics and constants
48 : !> \author Stefano Battaglia
49 : ! **************************************************************************************************
50 : MODULE erf_complex
51 :
52 : USE kinds, ONLY: dp
53 : USE mathconstants, ONLY: pi
54 :
55 : IMPLICIT NONE
56 :
57 : REAL(kind=dp), PARAMETER :: zero = 0.0_dp, one = 1.0_dp, &
58 : two = 2.0_dp, half = 0.5_dp, rmin = TINY(one), &
59 : eps0 = EPSILON(one), sqrt_log_rmin = SQRT(-LOG(rmin)), &
60 : pi2 = pi*pi, one_sqrt_pi = one/SQRT(pi)
61 : COMPLEX(kind=dp), PARAMETER :: &
62 : cmplxj = CMPLX(zero, one, kind=dp), &
63 : cmplx0 = CMPLX(zero, zero, kind=dp)
64 :
65 : PRIVATE
66 : PUBLIC :: faddeyeva_fast, faddeyeva_accurate, erfz_fast, erfz_accurate
67 :
68 : CONTAINS
69 :
70 : ! **************************************************************************************************
71 : !> \brief Computes the Faddeyeva function w(z) = exp(-z**2) * erfc(-i*z)
72 : !> \param z complex argument
73 : !> \param err desired accuracy (positive)
74 : !> \return Faddeyeva function w(z)
75 : ! **************************************************************************************************
76 0 : elemental COMPLEX(kind=dp) FUNCTION faddeyeva_accurate(z, err)
77 :
78 : ! This is the Faddeyeva or the plasma dispersion function,
79 : ! w(z) = exp(-z**2) * erfc(-i*z). erfc(z) is the complex complementary
80 : ! error function of z. z is a complex number.
81 : !
82 : ! Adapted from the Matlab code implementing TOMS
83 : ! Algorithm 916: http://www.netlib.org/toms/
84 : !
85 : ! file: 916.zip
86 : ! ref: TOMS 38,2 (Dec 2011) Article: 15
87 : ! for: Computing the Faddeyeva and Voigt Functions
88 : ! by: Mofreh R. Zaghloul and Ahmed N. Ali
89 : !
90 : ! Most of the code is calculation of equations (13)-(19) of the
91 : ! above paper.
92 : !
93 : ! Inputs:
94 : ! z - the argument of the function
95 : ! err - The desired accuracy (positive). err must be (err .le. 1.0e-4)
96 : ! and (err .ge. 0.06447*epsilon). For efficiency,
97 : ! no checks are made!
98 : ! The lowest accuracy and the fastest calculation are obtained
99 : ! with err .eq. 1.0e-4. For higher accuracy use smaller err.
100 :
101 : COMPLEX(kind=dp), INTENT(in) :: z
102 : REAL(kind=dp), INTENT(in) :: err
103 :
104 : INTEGER :: n, n3, n3_3
105 : REAL(kind=dp) :: a, a_pi, a_sqr, aux13, cos_2yx, del2_tmp, del3, del3_3_tmp, del3_tmp, del5, &
106 : delta3, delta5, den1, erfcsy, exp1, exp2, exp3, exp3_3_den, exp3_den, exp_del1, &
107 : exp_x_sqr, four_a_sqr, half_a, l_old, myerr, sigma1, sigma2, sigma3, sigma4, sigma4_5, &
108 : sigma5, sin_2yx, two_a, two_a_pi, two_a_sqr, two_a_x, two_exp_x_sqr_ysqr, two_yx, v_old, &
109 : x, x_sqr, xsign, y, y_sqr, ysign
110 :
111 0 : x = REAL(z)
112 0 : y = AIMAG(z)
113 :
114 : ! For purely imaginary z, use intrinsic scaled complement of
115 : ! the error function, erfc_scaled (F2008 and beyond).
116 : ! Return immediately.
117 0 : IF (ABS(x) == zero) THEN
118 0 : faddeyeva_accurate = erfc_scaled(y)
119 0 : RETURN
120 : END IF
121 :
122 0 : myerr = MAX(err, eps0)
123 0 : a = SQRT(-pi2/LOG(err/2.0_dp))
124 0 : half_a = half*a
125 0 : a_sqr = a**2
126 0 : two_a = 2*a
127 0 : two_a_sqr = 2*a_sqr
128 0 : four_a_sqr = 4*a_sqr
129 0 : a_pi = a/pi
130 0 : two_a_pi = 2*a_pi
131 0 : erfcsy = erfc_scaled(ABS(y))
132 0 : xsign = SIGN(one, x)
133 0 : ysign = SIGN(one, y)
134 0 : x = ABS(x)
135 0 : y = MAX(rmin, ABS(y))
136 0 : x_sqr = x**2
137 0 : y_sqr = y**2
138 0 : two_yx = 2*y*x
139 0 : two_a_x = two_a*x
140 0 : exp_x_sqr = EXP(-x_sqr)
141 0 : cos_2yx = COS(two_yx)
142 0 : sin_2yx = SIN(two_yx)
143 : v_old = exp_x_sqr* &
144 0 : (erfcsy*cos_2yx + two_a_pi*SIN(two_yx/2)**2/y)
145 0 : l_old = -erfcsy + a_pi/y
146 0 : sigma3 = rmin
147 0 : sigma5 = rmin
148 0 : sigma4_5 = zero
149 0 : delta3 = one
150 0 : delta5 = one
151 0 : n = 0
152 0 : n3 = CEILING(x/a)
153 0 : n3_3 = n3 - 1
154 :
155 0 : outer: IF ((sqrt_log_rmin - x) > 0) THEN
156 0 : sigma1 = rmin
157 0 : sigma2 = rmin
158 0 : sigma4 = rmin
159 0 : exp1 = EXP(-two_a_x)
160 0 : exp2 = EXP(four_a_sqr*n3 - 2*two_a_x - two_a_sqr)
161 0 : exp3 = EXP(-(two_a_sqr*n3 - two_a_x - two_a_sqr))
162 0 : del2_tmp = one
163 : del3_tmp = EXP(-(a_sqr*n3**2 - two_a_x*n3 &
164 0 : - two_a_sqr*n3 + x_sqr + two_a_x + a_sqr))
165 0 : del3_3_tmp = EXP(a_sqr - (two_a_sqr*n3 - two_a_x))
166 :
167 0 : loop1: DO
168 0 : IF (delta3 < myerr .AND. delta5 < myerr .AND. &
169 : n > 50) EXIT loop1
170 0 : n = n + 1
171 0 : den1 = a_sqr*n**2 + y_sqr
172 0 : exp_del1 = EXP(-(a_sqr*n**2))/den1
173 0 : del2_tmp = del2_tmp*exp1
174 :
175 0 : minor: IF (n3_3 >= 1) THEN
176 0 : del3_tmp = del3_tmp*exp3
177 : exp3_den = del3_tmp*exp_del1* &
178 0 : (den1/(a_sqr*n3**2 + y_sqr))
179 0 : del3_3_tmp = del3_3_tmp*exp2
180 : exp3_3_den = exp3_den*del3_3_tmp* &
181 : ((a_sqr*n3**2 + y_sqr)/ &
182 0 : (a_sqr*n3_3**2 + y_sqr))
183 0 : del5 = n3_3*exp3_3_den + n3*exp3_den
184 0 : del3 = exp3_3_den + exp3_den
185 : ELSE
186 0 : del3_tmp = del3_tmp*exp3
187 : del3 = del3_tmp*exp_del1* &
188 0 : (den1/(a_sqr*n3**2 + y_sqr))
189 0 : del5 = n3*del3
190 : END IF minor
191 :
192 0 : delta3 = del3/sigma3
193 0 : delta5 = del5/sigma5
194 0 : sigma1 = sigma1 + exp_del1
195 0 : sigma2 = sigma2 + del2_tmp*exp_x_sqr*exp_del1
196 0 : sigma3 = sigma3 + del3
197 0 : sigma4 = sigma4 + n*del2_tmp*exp_x_sqr*exp_del1
198 0 : sigma5 = sigma5 + del5
199 :
200 0 : IF (x >= 5.0e-4_dp) THEN
201 0 : sigma4_5 = -sigma4 + sigma5
202 : ELSE
203 : sigma4_5 = sigma4_5 + 2*n**2*two_a_x*exp_x_sqr &
204 : *exp_del1*(one + 1.666666666666667e-1_dp &
205 : *(two_a_x*n)**2 + 8.333333333333333e-3_dp &
206 0 : *(two_a_x*n)**4)
207 : END IF
208 :
209 0 : n3 = n3 + 1
210 0 : n3_3 = n3_3 - 1
211 :
212 : END DO loop1
213 :
214 : ! Second line of Eqn (13)
215 : aux13 = y*two_a_pi* &
216 0 : (-cos_2yx*exp_x_sqr*sigma1 + half*(sigma2 + sigma3))
217 0 : mumu: IF (y <= 5.0_dp .AND. two_yx > rmin) THEN
218 : faddeyeva_accurate = v_old + aux13 + cmplxj*xsign &
219 : *(sin_2yx*exp_x_sqr*(l_old + two_a_pi*y*sigma1) &
220 0 : + two_a_pi*half_a*sigma4_5)
221 0 : ELSE IF (y <= 5.0_dp .AND. two_yx <= rmin) THEN
222 : faddeyeva_accurate = v_old + aux13 + cmplxj*xsign &
223 : *(two*y*exp_x_sqr*(x*l_old + x*two_a_pi*y &
224 0 : *sigma1) + two_a_pi*half_a*sigma4_5)
225 : ELSE
226 : faddeyeva_accurate = v_old + aux13 + cmplxj*xsign &
227 : *(sin_2yx*exp_x_sqr*MIN(zero, ABS(l_old &
228 : + (two_a_pi*y*sigma1))) + two_a_pi*half_a &
229 0 : *sigma4_5)
230 : END IF mumu
231 :
232 0 : ELSE IF (x >= sqrt_log_rmin .AND. x < 1.0e15_dp) THEN
233 :
234 0 : exp2 = EXP(four_a_sqr*n3 - 2*two_a_x - two_a_sqr)
235 0 : del3_3_tmp = EXP(a_sqr + two_a_x - two_a_sqr*n3)
236 :
237 0 : loop2: DO
238 0 : IF (delta3 < myerr .AND. delta5 < myerr .AND. &
239 : n > 50) EXIT loop2
240 0 : n = n + 1
241 0 : IF (n3_3 >= 1) THEN
242 : exp3_den = EXP(-(a*n3 - x)*(a*n3 - x)) &
243 0 : /(a_sqr*n3**2 + y_sqr)
244 0 : del3_3_tmp = del3_3_tmp*exp2
245 : exp3_3_den = exp3_den*del3_3_tmp*((a_sqr*n3**2 + y_sqr) &
246 0 : /(a_sqr*n3_3**2 + y_sqr))
247 0 : del5 = n3_3*exp3_3_den + n3*exp3_den
248 0 : del3 = exp3_3_den + exp3_den
249 : ELSE
250 0 : del3 = EXP(-(a*n3 - x)**2)/(a_sqr*n3**2 + y_sqr)
251 0 : del5 = n3*del3
252 : END IF
253 :
254 0 : delta3 = del3/sigma3
255 0 : delta5 = del5/sigma5
256 0 : sigma3 = sigma3 + del3
257 0 : sigma5 = sigma5 + del5
258 0 : n3 = n3 + 1
259 0 : n3_3 = n3_3 - 1
260 :
261 : END DO loop2
262 :
263 : faddeyeva_accurate = v_old + y*a_pi*sigma3 + cmplxj*xsign*(sin_2yx &
264 0 : *exp_x_sqr*l_old + two_a_pi*half_a*sigma5)
265 :
266 : ELSE
267 0 : faddeyeva_accurate = one_sqrt_pi*((y + cmplxj*xsign*x)/(x_sqr + y_sqr))
268 : END IF outer
269 :
270 0 : IF (ysign < zero) THEN
271 0 : two_exp_x_sqr_ysqr = two*EXP(-x_sqr + y_sqr)
272 : faddeyeva_accurate = two_exp_x_sqr_ysqr*cos_2yx - REAL(faddeyeva_accurate) - cmplxj &
273 0 : *(-xsign*two_exp_x_sqr_ysqr*sin_2yx - AIMAG(faddeyeva_accurate))
274 : END IF
275 :
276 : END FUNCTION faddeyeva_accurate
277 :
278 : ! **************************************************************************************************
279 : !> \brief Computes the error function of a complex argument using the Zaghloul and Ali algorithm
280 : !> \param z complex argument
281 : !> \param err desired accuracy (positive)
282 : !> \return error function of z
283 : ! **************************************************************************************************
284 0 : elemental COMPLEX(kind=dp) FUNCTION erfz_accurate(z, err)
285 :
286 : ! This is an error function of a complex argument, which uses faddeyeva_accurate(z).
287 :
288 : COMPLEX(kind=dp), INTENT(in) :: z
289 : REAL(kind=dp), INTENT(in) :: err
290 :
291 0 : erfz_accurate = one - faddeyeva_accurate(cmplxj*z, err)*EXP(-z**2)
292 :
293 0 : END FUNCTION erfz_accurate
294 :
295 : ! **************************************************************************************************
296 : !> \brief Computes the Faddeyeva function w(z) = exp(-z**2) * erfc(-i*z)
297 : !> \param z complex argument
298 : !> \return Faddeyeva function w(z)
299 : ! **************************************************************************************************
300 1510738 : elemental COMPLEX(kind=dp) FUNCTION faddeyeva_fast(z)
301 :
302 : ! A modified version of algorithm 680, rewritten in Fortran 2008.
303 : ! G.P.M. Poppe, C.M.J. Wijers, More efficient computation of
304 : ! the complex error-function, ACM Trans. Math. Software 16:38-46, 1990.
305 : ! and
306 : ! G.P.M. Poppe, C.M.J. Wijers, Algorithm 680, Evaluation of the
307 : ! complex error function, ACM Trans. Math. Software 16:47, 1990.
308 : !
309 : ! Given a complex number z, this function computes
310 : ! the value of the Faddeeva-function w(z) = exp(-z**2)*erfc(-i*z),
311 : ! where erfc is the complex complementary error-function and i
312 : ! means sqrt(-1). The accuracy of the algorithm for z in the 1st
313 : ! and 2nd quadrant is 14 significant digits; in the 3rd and 4th
314 : ! it is 13 significant digits outside a circular region with radius
315 : ! 0.126 around a zero of the function.
316 :
317 : COMPLEX(kind=dp), INTENT(in) :: z
318 :
319 : REAL(kind=dp), PARAMETER :: factor = 1.12837916709551257388_dp
320 :
321 : INTEGER :: i, j, kapn, n, np1, nu
322 : LOGICAL :: a, b
323 : REAL(kind=dp) :: c, daux, h, h2, qlambda, qrho, rx, ry, &
324 : sx, sy, tx, ty, u, u1, u2, v, v1, v2, &
325 : w1, x, xabs, xabsq, xaux, xi, xquad, &
326 : xsum, y, yabs, yi, yquad, ysum
327 :
328 : ! factor is 2/sqrt(pi)
329 :
330 : ! To avoid the complier uninitialised varning
331 1510738 : h2 = zero
332 :
333 1510738 : xi = REAL(z)
334 1510738 : yi = AIMAG(z)
335 1510738 : xabs = ABS(xi)
336 1510738 : yabs = ABS(yi)
337 1510738 : x = xabs/6.3_dp
338 1510738 : y = yabs/4.4_dp
339 1510738 : qrho = x**2 + y**2
340 1510738 : xabsq = xabs**2
341 1510738 : xquad = xabsq - yabs**2
342 1510738 : yquad = 2*xabs*yabs
343 :
344 1510738 : a = qrho < 0.085264_dp
345 :
346 1510738 : branch1: IF (a) THEN
347 :
348 : ! If ( qrho .lt. 0.085264 ) then the Faddeeva-function is evaluated
349 : ! using a power-series (abramowitz/stegun, equation (7.1.5), p.297)
350 : ! n is the minimum number of terms needed to obtain the required
351 : ! accuracy
352 :
353 0 : qrho = (one - 0.85_dp*y)*SQRT(qrho)
354 0 : n = NINT(6.0_dp + 72.0_dp*qrho)
355 0 : j = 2*n + 1
356 0 : xsum = one/REAL(j, kind=dp)
357 0 : ysum = zero
358 :
359 0 : DO i = n, 1, -1
360 0 : j = j - 2
361 0 : xaux = (xsum*xquad - ysum*yquad)/REAL(i, kind=dp)
362 0 : ysum = (xsum*yquad + ysum*xquad)/REAL(i, kind=dp)
363 0 : xsum = xaux + one/REAL(j, kind=dp)
364 : END DO
365 :
366 0 : u1 = -factor*(xsum*yabs + ysum*xabs) + one
367 0 : v1 = factor*(xsum*xabs - ysum*yabs)
368 0 : daux = EXP(-xquad)
369 0 : u2 = daux*COS(yquad)
370 0 : v2 = -daux*SIN(yquad)
371 0 : u = u1*u2 - v1*v2
372 0 : v = u1*v2 + v1*u2
373 : ELSE
374 :
375 1510738 : bran2: IF (qrho > one) THEN
376 :
377 : ! If ( qrho .gt. 1) then w(z) is evaluated using the laplace
378 : ! continued fraction. nu is the minimum number of terms needed
379 : ! to obtain the required accuracy.
380 :
381 1509818 : h = zero
382 1509818 : kapn = 0
383 1509818 : qrho = SQRT(qrho)
384 : nu = INT(3.0_dp + (1442.0_dp/(26.0_dp*qrho &
385 1509818 : + 77.0_dp)))
386 :
387 : ELSE
388 :
389 : ! If ( qrho .ge. 0.085264 .and. qrho .le. one ) then
390 : ! w(z) is evaluated by a truncated Taylor expansion,
391 : ! where the Laplace continued fraction is used to calculate
392 : ! the derivatives of w(z). KAPN is the minimum number of terms
393 : ! in the Taylor expansion needed to obtain the required accuracy.
394 : ! NU is the minimum number of terms of the continued fraction
395 : ! needed to calculate the derivatives with the required accuracy.
396 :
397 920 : qrho = (one - y)*SQRT(one - qrho)
398 920 : h = 1.88_dp*qrho
399 920 : h2 = two*h
400 920 : kapn = NINT(7.0_dp + 34.0_dp*qrho)
401 920 : nu = NINT(16.0_dp + 26.0_dp*qrho)
402 :
403 : END IF bran2
404 :
405 1510738 : b = h > zero
406 :
407 : ! To avoid uninitialise compiler warning. qlambda is used
408 : ! only if (b), so can define to any value otherwise.
409 1510738 : qlambda = zero
410 1510738 : IF (b) qlambda = h2**kapn
411 :
412 : rx = zero
413 : ry = zero
414 : sx = zero
415 : sy = zero
416 :
417 22381324 : DO n = nu, 0, -1
418 20870586 : np1 = n + 1
419 20870586 : tx = yabs + h + np1*rx
420 20870586 : ty = xabs - np1*ry
421 20870586 : c = half/(tx**2 + ty**2)
422 20870586 : rx = c*tx
423 20870586 : ry = c*ty
424 22381324 : IF (b .AND. n <= kapn) THEN
425 7360 : tx = qlambda + sx
426 7360 : sx = rx*tx - ry*sy
427 7360 : sy = ry*tx + rx*sy
428 7360 : qlambda = qlambda/h2
429 : END IF
430 : END DO
431 :
432 1510738 : IF (h == zero) THEN
433 1509818 : u = factor*rx
434 1509818 : v = factor*ry
435 : ELSE
436 920 : u = factor*sx
437 920 : v = factor*sy
438 : END IF
439 :
440 1510738 : IF (yabs == zero) u = EXP(-xabs**2)
441 :
442 : END IF branch1
443 :
444 : ! Evaluation of w(z) in the other quadrants
445 :
446 1510738 : IF (yi < zero) THEN
447 :
448 0 : IF (a) THEN
449 0 : u2 = two*u2
450 0 : v2 = two*v2
451 : ELSE
452 0 : xquad = -xquad
453 0 : w1 = two*EXP(xquad)
454 0 : u2 = w1*COS(yquad)
455 0 : v2 = -w1*SIN(yquad)
456 : END IF
457 :
458 0 : u = u2 - u
459 0 : v = v2 - v
460 0 : IF (xi > zero) v = -v
461 : ELSE
462 1510738 : IF (xi < zero) v = -v
463 : END IF
464 :
465 1510738 : faddeyeva_fast = CMPLX(u, v, kind=dp)
466 :
467 1510738 : END FUNCTION faddeyeva_fast
468 :
469 : ! **************************************************************************************************
470 : !> \brief Computes the error function of a complex argument using the Poppe and Wijers algorithm
471 : !> \param z complex argument
472 : !> \return error function of z
473 : ! **************************************************************************************************
474 1510738 : elemental COMPLEX(kind=dp) FUNCTION erfz_fast(z)
475 :
476 : ! This is an error function of a complex argument, which uses faddeyeva_fast(z).
477 :
478 : COMPLEX(kind=dp), INTENT(in) :: z
479 :
480 1510738 : erfz_fast = one - faddeyeva_fast(cmplxj*z)*EXP(-z**2)
481 :
482 1510738 : END FUNCTION erfz_fast
483 :
484 : !*********************************************************************
485 : END MODULE erf_complex
|