Raymond Toy pushed to branch master at cmucl / cmucl Commits: 3416f6d5 by Raymond Toy at 2026-01-08T08:13:59-08:00 Fix #456: More accurate complex division - - - - - f8d90cd0 by Raymond Toy at 2026-01-08T08:13:59-08:00 Merge branch 'issue-456-more-accurate-complex-div' into 'master' Fix #456: More accurate complex division Closes #456 See merge request cmucl/cmucl!335 - - - - - 4 changed files: - src/code/numbers.lisp - src/compiler/float-tran.lisp - src/general-info/release-22a.md - tests/float.lisp Changes: ===================================== src/code/numbers.lisp ===================================== @@ -593,26 +593,250 @@ (build-ratio x y) (build-ratio (truncate x gcd) (truncate y gcd)))))) +;; An implementation of Baudin and Smith's robust complex division for +;; double-floats. This is a pretty straightforward translation of the +;; original in https://arxiv.org/pdf/1210.4539. +;; +;; This also includes improvements mentioned in +;; https://lpc.events/event/11/contributions/1005/attachments/856/1625/Complex_.... +;; In particular iteration 1 and 3 are added. Iteration 2 and 4 were +;; not added. The test examples from iteration 2 and 4 didn't change +;; with or without changes added. +(defconstant +cdiv-rmin+ least-positive-normalized-double-float) +(defconstant +cdiv-rbig+ (/ most-positive-double-float 2)) +(defconstant +cdiv-rmin2+ (scale-float 1d0 -53)) +(defconstant +cdiv-rminscal+ (scale-float 1d0 51)) +(defconstant +cdiv-rmax2+ (* +cdiv-rbig+ +cdiv-rmin2+)) +;; This is the value of %eps from Scilab +(defconstant +cdiv-eps+ (scale-float 1d0 -52)) +(defconstant +cdiv-be+ (/ 2 (* +cdiv-eps+ +cdiv-eps+))) +(defconstant +cdiv-2/eps+ (/ 2 +cdiv-eps+)) + +;; Make these functions accessible. cdiv-double-float and +;; cdiv-single-float are used by deftransforms. Of course, two-arg-/ +;; is the interface to division. cdiv-generic isn't used anywhere +;; else. +(declaim (ext:start-block cdiv-double-float cdiv-single-float two-arg-/)) + +(defun cdiv-double-float (x y) + "Accurate division of complex double-float numbers x and y." + (declare (type (complex double-float) x y) + (optimize (speed 3) (safety 0))) + (labels + ((internal-compreal (a b c d r tt) + (declare (double-float a b c d r tt)) + ;; Compute the real part of the complex division + ;; (a+ib)/(c+id), assuming |c| <= |d|. r = d/c and tt = 1/(c+d*r). + ;; + ;; The realpart is (a*c+b*d)/(c^2+d^2). + ;; + ;; c^2+d^2 = c*(c+d*(d/c)) = c*(c+d*r) + ;; + ;; Then + ;; + ;; (a*c+b*d)/(c^2+d^2) = (a*c+b*d)/(c*(c+d*r)) + ;; = (a + b*d/c)/(c+d*r) + ;; = (a + b*r)/(c + d*r). + ;; + ;; Thus tt = (c + d*r). + (cond ((>= (abs r) +cdiv-rmin+) + (let ((br (* b r))) + (if (/= br 0) + (/ (+ a br) tt) + ;; b*r underflows. Instead, compute + ;; + ;; (a + b*r)*tt = a*tt + b*tt*r + ;; = a*tt + (b*tt)*r + ;; (a + b*r)/tt = a/tt + b/tt*r + ;; = a*tt + (b*tt)*r + (+ (/ a tt) + (* (/ b tt) + r))))) + (t + ;; r = 0 so d is very tiny compared to c. + ;; + (/ (+ a (* d (/ b c))) + tt)))) + (robust-subinternal (a b c d) + (declare (double-float a b c d)) + (let* ((r (/ d c)) + (tt (+ c (* d r)))) + ;; e is the real part and f is the imaginary part. We + ;; can use internal-compreal for the imaginary part by + ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is + ;; the same as the real part of (b-i*a)/(c+i*d). + (let ((e (internal-compreal a b c d r tt)) + (f (internal-compreal b (- a) c d r tt))) + (values e + f)))) + + (robust-internal (x y) + (declare (type (complex double-float) x y)) + (let ((a (realpart x)) + (b (imagpart x)) + (c (realpart y)) + (d (imagpart y))) + (declare (double-float a b c d)) + (flet ((maybe-scale (abs-tst a b c d) + (declare (double-float a b c d)) + ;; This implements McGehearty's iteration 3 to + ;; handle the case when some values are too big + ;; and should be scaled down. Also if some + ;; values are too tiny, scale them up. + (let ((abs-a (abs a)) + (abs-b (abs b))) + (if (or (> abs-tst +cdiv-rbig+) + (> abs-a +cdiv-rbig+) + (> abs-b +cdiv-rbig+)) + (setf a (* a 0.5d0) + b (* b 0.5d0) + c (* c 0.5d0) + d (* d 0.5d0)) + (if (< abs-tst +cdiv-rmin2+) + (setf a (* a +cdiv-rminscal+) + b (* b +cdiv-rminscal+) + c (* c +cdiv-rminscal+) + d (* d +cdiv-rminscal+)) + (if (or (and (< abs-a +cdiv-rmin+) + (< abs-b +cdiv-rmax2+) + (< abs-tst +cdiv-rmax2+)) + (and (< abs-b +cdiv-rmin+) + (< abs-a +cdiv-rmax2+) + (< abs-tst +cdiv-rmax2+))) + (setf a (* a +cdiv-rminscal+) + b (* b +cdiv-rminscal+) + c (* c +cdiv-rminscal+) + d (* d +cdiv-rminscal+))))) + (values a b c d)))) + (cond + ((<= (abs d) (abs c)) + ;; |d| <= |c|, so we can use robust-subinternal to + ;; perform the division. + (multiple-value-bind (a b c d) + (maybe-scale (abs c) a b c d) + (multiple-value-bind (e f) + (robust-subinternal a b c d) + (complex e f)))) + (t + ;; |d| > |c|. So, instead compute + ;; + ;; (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2) + ;; + ;; Compare this to (a+i*b)/(c+i*d) and we see that + ;; realpart of the former is the same, but the + ;; imagpart of the former is the negative of the + ;; desired division. + (multiple-value-bind (a b c d) + (maybe-scale (abs d) a b c d) + (multiple-value-bind (e f) + (robust-subinternal b a d c) + (complex e (- f)))))))))) + (let* ((a (realpart x)) + (b (imagpart x)) + (c (realpart y)) + (d (imagpart y)) + (ab (max (abs a) (abs b))) + (cd (max (abs c) (abs d))) + (s 1d0)) + (declare (double-float s)) + ;; If a or b is big, scale down a and b. + (when (>= ab +cdiv-rbig+) + (setf x (/ x 2) + s (* s 2))) + ;; If c or d is big, scale down c and d. + (when (>= cd +cdiv-rbig+) + (setf y (/ y 2) + s (/ s 2))) + ;; If a or b is tiny, scale up a and b. + (when (<= ab (* +cdiv-rmin+ +cdiv-2/eps+)) + (setf x (* x +cdiv-be+) + s (/ s +cdiv-be+))) + ;; If c or d is tiny, scale up c and d. + (when (<= cd (* +cdiv-rmin+ +cdiv-2/eps+)) + (setf y (* y +cdiv-be+) + s (* s +cdiv-be+))) + (* s + (robust-internal x y))))) + +;; Smith's algorithm for complex division for (complex single-float). +;; We convert the parts to double-floats before computing the result. +(defun cdiv-single-float (x y) + "Accurate division of complex single-float numbers x and y." + (declare (type (complex single-float) x y)) + (let ((a (float (realpart x) 1d0)) + (b (float (imagpart x) 1d0)) + (c (float (realpart y) 1d0)) + (d (float (imagpart y) 1d0))) + (cond ((< (abs c) (abs d)) + (let* ((r (/ c d)) + (denom (+ (* c r) d)) + (e (float (/ (+ (* a r) b) denom) 1f0)) + (f (float (/ (- (* b r) a) denom) 1f0))) + (complex e f))) + (t + (let* ((r (/ d c)) + (denom (+ c (* d r))) + (e (float (/ (+ a (* b r)) denom) 1f0)) + (f (float (/ (- b (* a r)) denom) 1f0))) + (complex e f)))))) + +;; Generic implementation of Smith's algorithm. +(defun cdiv-generic (x y) + "Complex division of generic numbers x and y. One of x or y should be + a complex." + (let ((a (realpart x)) + (b (imagpart x)) + (c (realpart y)) + (d (imagpart y))) + (cond ((< (abs c) (abs d)) + (let* ((r (/ c d)) + (denom (+ (* c r) d)) + (e (/ (+ (* a r) b) denom)) + (f (/ (- (* b r) a) denom))) + (canonical-complex e f))) + (t + (let* ((r (/ d c)) + (denom (+ c (* d r))) + (e (/ (+ a (* b r)) denom)) + (f (/ (- b (* a r)) denom))) + (canonical-complex e f)))))) (defun two-arg-/ (x y) (number-dispatch ((x number) (y number)) (float-contagion / x y (ratio integer)) - - ((complex complex) - (let* ((rx (realpart x)) - (ix (imagpart x)) - (ry (realpart y)) - (iy (imagpart y))) - (if (> (abs ry) (abs iy)) - (let* ((r (/ iy ry)) - (dn (+ ry (* r iy)))) - (canonical-complex (/ (+ rx (* ix r)) dn) - (/ (- ix (* rx r)) dn))) - (let* ((r (/ ry iy)) - (dn (+ iy (* r ry)))) - (canonical-complex (/ (+ (* rx r) ix) dn) - (/ (- (* ix r) rx) dn)))))) - (((foreach integer ratio single-float double-float) complex) + + (((complex single-float) + (foreach (complex rational) (complex single-float))) + (cdiv-single-float x (coerce y '(complex single-float)))) + (((complex double-float) + (foreach (complex rational) (complex single-float) (complex double-float))) + (cdiv-double-float x (coerce y '(complex double-float)))) + + (((foreach integer ratio single-float (complex rational)) + (complex single-float)) + (cdiv-single-float (coerce x '(complex single-float)) + y)) + + (((foreach integer ratio single-float double-float (complex rational) + (complex single-float)) + (complex double-float)) + (cdiv-double-float (coerce x '(complex double-float)) + y)) + (((complex double-double-float) + (foreach (complex rational) (complex single-float) (complex double-float) + (complex double-double-float))) + ;; We should do something better for double-double floats. + (cdiv-generic x y)) + + (((foreach integer ratio single-float double-float double-double-float + (complex rational) (complex single-float) (complex double-float)) + (complex double-double-float)) + (cdiv-generic x y)) + + (((foreach integer ratio single-float double-float double-double-float) + (complex rational)) + ;; Smith's algorithm, but takes advantage of the fact that the + ;; numerator is a real number and not complex. (let* ((ry (realpart y)) (iy (imagpart y))) (if (> (abs ry) (abs iy)) @@ -624,10 +848,23 @@ (dn (* iy (+ 1 (* r r))))) (canonical-complex (/ (* x r) dn) (/ (- x) dn)))))) - ((complex (or rational float)) + (((complex rational) + (complex rational)) + ;; We probably don't need to do Smith's algorithm for rationals. + ;; A naive implementation of coplex division has no issues. + (cdiv-generic x y)) + + (((foreach (complex rational) (complex single-float) (complex double-float) + (complex double-double-float)) + (or rational float)) (canonical-complex (/ (realpart x) y) (/ (imagpart x) y))) - + + ((double-float + (complex single-float)) + (cdiv-double-float (coerce x '(complex double-float)) + (coerce y '(complex double-float)))) + ((ratio ratio) (let* ((nx (numerator x)) (dx (denominator x)) @@ -656,6 +893,7 @@ (build-ratio (maybe-truncate nx gcd) (* (maybe-truncate y gcd) (denominator x))))))) +(declaim (ext:end-block)) (defun %negate (n) (number-dispatch ((n number)) ===================================== src/compiler/float-tran.lisp ===================================== @@ -1804,46 +1804,13 @@ #+double-double (frob double-double-float)) -#+(and sse2 complex-fp-vops) (macrolet - ((frob (type one) - `(deftransform / ((x y) (,type ,type) * - :policy (> speed space)) - ;; Divide a complex by a complex + ((frob (type name) + `(deftransform / ((x y) ((complex ,type) (complex ,type)) *) + (,name x y)))) + (frob double-float kernel::cdiv-double-float) + (frob single-float kernel::cdiv-single-float)) - ;; Here's how we do a complex division - ;; - ;; Compute (xr + i*xi)/(yr + i*yi) - ;; - ;; Assume |yi| < |yr|. Then - ;; - ;; (xr + i*xi) (xr + i*xi) - ;; ----------- = ----------------- - ;; (yr + i*yi) yr*(1 + i*(yi/yr)) - ;; - ;; (xr + i*xi)*(1 - i*(yi/yr)) - ;; = --------------------------- - ;; yr*(1 + (yi/yr)^2) - ;; - ;; (xr + i*xi)*(1 - i*(yi/yr)) - ;; = --------------------------- - ;; yr + (yi/yr)*yi - ;; - ;; This allows us to use a fast complex multiply followed by - ;; a real division. - '(let* ((ry (realpart y)) - (iy (imagpart y))) - (if (> (abs ry) (abs iy)) - (let* ((r (/ iy ry)) - (dn (+ ry (* r iy)))) - (/ (* x (complex ,one r)) - dn)) - (let* ((r (/ ry iy)) - (dn (+ iy (* r ry)))) - (/ (* x (complex r ,(- one))) - dn))))))) - (frob (complex single-float) 1f0) - (frob (complex double-float) 1d0)) ;;;; Complex contagion: ===================================== src/general-info/release-22a.md ===================================== @@ -34,6 +34,9 @@ public domain. * #446: Use C compiler to get errno values to update UNIX defpackage with errno symbols * #453: Use correct flags for analyzer and always save logs. + * #456: Improve accuracy for division of complex double-floats + using Baudin and Smith's robust complex division algorithm + with improvements by Patrick McGehearty. * #458: Spurious overflow in double-double-float multiply * Other changes: * Improvements to the PCL implementation of CLOS: ===================================== tests/float.lisp ===================================== @@ -348,3 +348,211 @@ (:tag :issues) (assert-equal -2w300 (* -2w300 1w0))) + + + +;; Rudimentary code to read C %a formatted numbers that look like +;; "-0x1.c4dba4ba1ee79p-620". We assume STRING is exactly in this +;; format. No error-checking is done. +(defun parse-hex-float (string) + (let* ((sign (if (char= (aref string 0) #\-) + -1 + 1)) + (dot-posn (position #\. string)) + (p-posn (position #\p string)) + (lead (parse-integer string :start (1- dot-posn) :end dot-posn)) + (frac (parse-integer string :start (1+ dot-posn) :end p-posn :radix 16)) + (exp (parse-integer string :start (1+ p-posn)))) + (* sign + (scale-float (float (+ (ash lead 52) + frac) + 1d0) + (- exp 52))))) + +;; Relative error in terms of bits of accuracy. This is the +;; definition used by Baudin and Smith. A result of 53 means the two +;; numbers have identical bits. For complex numbers, we use the min +;; of the bits of accuracy of the real and imaginary parts. +(defun rel-err (computed expected) + (flet ((rerr (c e) + (let ((diff (abs (- c e)))) + (if (zerop diff) + (float-digits diff) + (floor (- (log (/ diff (abs e)) 2d0))))))) + (min (rerr (realpart computed) (realpart expected)) + (rerr (imagpart computed) (imagpart expected))))) + +(defun do-cdiv-test (x y z-true expected-rel) + (let* ((z (/ x y)) + (rel (rel-err z z-true))) + (assert-equal expected-rel + rel + x y z z-true rel))) +;; Issue #456: improve accuracy of division of complex double-floats. +;; +;; Tests for complex division. Tests 1-10 are from Baudin and Smith. +;; Test 11 is an example from Maxima. Test 12 is an example from the +;; ansi-tests where (/ z z) didn't produce exactly 1. Tests 13-16 are +;; for examples for improvement iterations 1-4 from McGehearty. +(macrolet + ((frob (name x y z-true rel) + `(define-test ,name + (:tag :issues) + (do-cdiv-test ,x ,y ,z-true ,rel)))) + ;; First cases are from Baudin and Smith + ;; 1 + (frob cdiv.baudin-case.1 + (complex 1d0 1d0) + (complex 1d0 (scale-float 1d0 1023)) + (complex (scale-float 1d0 -1023) + (scale-float -1d0 -1023)) + 53) + ;; 2 + (frob cdiv.baudin-case.2 + (complex 1d0 1d0) + (complex (scale-float 1d0 -1023) (scale-float 1d0 -1023)) + (complex (scale-float 1d0 1023) 0) + 53) + ;; 3 + (frob cdiv.baudin-case.3 + (complex (scale-float 1d0 1023) (scale-float 1d0 -1023)) + (complex (scale-float 1d0 677) (scale-float 1d0 -677)) + (complex (scale-float 1d0 346) (scale-float -1d0 -1008)) + 53) + ;; 4 + (frob cdiv.baudin-case.4.overflow + (complex (scale-float 1d0 1023) (scale-float 1d0 1023)) + (complex 1d0 1d0) + (complex (scale-float 1d0 1023) 0) + 53) + ;; 5 + (frob cdiv.baudin-case.5.underflow-ratio + (complex (scale-float 1d0 1020) (scale-float 1d0 -844)) + (complex (scale-float 1d0 656) (scale-float 1d0 -780)) + (complex (scale-float 1d0 364) (scale-float -1d0 -1072)) + 53) + ;; 6 + (frob cdiv.baudin-case.6.underflow-realpart + (complex (scale-float 1d0 -71) (scale-float 1d0 1021)) + (complex (scale-float 1d0 1001) (scale-float 1d0 -323)) + (complex (scale-float 1d0 -1072) (scale-float 1d0 20)) + 53) + ;; 7 + (frob cdiv.baudin-case.7.overflow-both-parts + (complex (scale-float 1d0 -347) (scale-float 1d0 -54)) + (complex (scale-float 1d0 -1037) (scale-float 1d0 -1058)) + (complex 3.898125604559113300d289 8.174961907852353577d295) + 53) + ;; 8 + (frob cdiv.baudin-case.8 + (complex (scale-float 1d0 -1074) (scale-float 1d0 -1074)) + (complex (scale-float 1d0 -1073) (scale-float 1d0 -1074)) + (complex 0.6d0 0.2d0) + 53) + ;; 9 + (frob cdiv.baudin-case.9 + (complex (scale-float 1d0 1015) (scale-float 1d0 -989)) + (complex (scale-float 1d0 1023) (scale-float 1d0 1023)) + (complex 0.001953125d0 -0.001953125d0) + 53) + ;; 10 + (frob cdiv.baudin-case.10.improve-imagpart-accuracy + (complex (scale-float 1d0 -622) (scale-float 1d0 -1071)) + (complex (scale-float 1d0 -343) (scale-float 1d0 -798)) + (complex 1.02951151789360578d-84 6.97145987515076231d-220) + 53) + ;; 11 + ;; + ;; From Maxima. This was from a (private) email where Maxima used + ;; CL:/ to compute the ratio but was not very accurate. + (frob cdiv.maxima-case + #c(5.43d-10 1.13d-100) + #c(1.2d-311 5.7d-312) + #c(3.691993880674614517999740937026568563794896024143749539711267954d301 + -1.753697093319947872394996242210428954266103103602859195409591583d301) + 52) + ;; 12 + ;; + ;; Found by ansi tests. z/z should be exactly 1. + (frob cdiv.ansi-test-z/z + #c(1.565640716292489d19 0.0d0) + #c(1.565640716292489d19 0.0d0) + #c(1d0 0) + 53) + ;; 13 + ;; Iteration 1. Without this, we would instead return + ;; + ;; (complex (parse-hex-float "0x1.ba8df8075bceep+155") + ;; (parse-hex-float "-0x1.a4ad6329485f0p-895")) + ;; + ;; whose imaginary part is quite a bit off. + (frob cdiv.mcgehearty-iteration.1 + (complex (parse-hex-float "0x1.73a3dac1d2f1fp+509") + (parse-hex-float "-0x1.c4dba4ba1ee79p-620")) + (complex (parse-hex-float "0x1.adf526c249cf0p+353") + (parse-hex-float "0x1.98b3fbc1677bbp-697")) + (complex (parse-hex-float "0x1.BA8DF8075BCEEp+155") + (parse-hex-float "-0x1.A4AD628DA5B74p-895")) + 53) + ;; 14 + ;; Iteration 2. + (frob cdiv.mcgehearty-iteration.2 + (complex (parse-hex-float "-0x0.000000008e4f8p-1022") + (parse-hex-float "0x0.0000060366ba7p-1022")) + (complex (parse-hex-float "-0x1.605b467369526p-245") + (parse-hex-float "0x1.417bd33105808p-256")) + (complex (parse-hex-float "0x1.cde593daa4ffep-810") + (parse-hex-float "-0x1.179b9a63df6d3p-799")) + 52) + ;; 15 + ;; Iteration 3 + (frob cdiv.mcgehearty-iteration.3 + (complex (parse-hex-float "0x1.cb27eece7c585p-355 ") + (parse-hex-float "0x0.000000223b8a8p-1022")) + (complex (parse-hex-float "-0x1.74e7ed2b9189fp-22") + (parse-hex-float "0x1.3d80439e9a119p-731")) + (complex (parse-hex-float "-0x1.3b35ed806ae5ap-333") + (parse-hex-float "-0x0.05e01bcbfd9f6p-1022")) + 53) + ;; 16 + ;; Iteration 4 + (frob cdiv.mcgehearty-iteration.4 + (complex (parse-hex-float "-0x1.f5c75c69829f0p-530") + (parse-hex-float "-0x1.e73b1fde6b909p+316")) + (complex (parse-hex-float "-0x1.ff96c3957742bp+1023") + (parse-hex-float "0x1.5bd78c9335899p+1021")) + (complex (parse-hex-float "-0x1.423c6ce00c73bp-710") + (parse-hex-float "0x1.d9edcf45bcb0ep-708")) + 52)) + +(define-test complex-division.misc + (:tag :issue) + (let ((num '(1 + 1/2 + 1.0 + 1d0 + #c(1 2) + #c(1.0 2.0) + #c(1d0 2d0) + #c(1w0 2w0)))) + ;; Try all combinations of divisions of different types. This is + ;; primarily to test that we got all the numeric contagion cases + ;; for division in CL:/. + (dolist (x num) + (dolist (y num) + (assert-true (/ x y) + x y))))) + +(define-test complex-division.single + (:tag :issues) + (let* ((x #c(1 2)) + (y (complex (expt 2 127) (expt 2 127))) + (expected (coerce (/ x y) + '(complex single-float)))) + ;; A naive implementation of complex division would cause an + ;; overflow in computing the denominator. + (assert-equal expected + (/ (coerce x '(complex single-float)) + (coerce y '(complex single-float))) + x + y))) View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/97824b42c485a2316a2c91d... -- View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/97824b42c485a2316a2c91d... You're receiving this email because of your account on gitlab.common-lisp.net.
participants (1)
-
Raymond Toy (@rtoy)