Raymond Toy pushed to branch issue-459-more-accurate-dd-complex-div at cmucl / cmucl
Commits:
-
a6ea12d7
by Raymond Toy at 2025-12-13T08:32:38-08:00
-
8d0c23e1
by Raymond Toy at 2025-12-13T08:35:34-08:00
4 changed files:
Changes:
| ... | ... | @@ -1669,6 +1669,7 @@ |
| 1669 | 1669 | "TRUST-DYNAMIC-EXTENT-DECLARATION-P"
|
| 1670 | 1670 | "IR2-STACK-ALLOCATE"
|
| 1671 | 1671 | "%DYNAMIC-EXTENT" "%DYNAMIC-EXTENT-START" "%DYNAMIC-EXTENT-END")
|
| 1672 | + (:export "CDIV-DOUBLE-DOUBLE-FLOAT")
|
|
| 1672 | 1673 | )
|
| 1673 | 1674 | (defpackage "XREF"
|
| 1674 | 1675 | (:export "INIT-XREF-DATABASE"
|
| ... | ... | @@ -1918,6 +1919,8 @@ |
| 1918 | 1919 | (:import-from "LISP" "BOOLEAN")
|
| 1919 | 1920 | (:import-from "C-CALL" "VOID")
|
| 1920 | 1921 | (:import-from "CONDITIONS" "PARSE-UNKNOWN-TYPE-SPECIFIER")
|
| 1922 | + #+double-double
|
|
| 1923 | + (:import-from "C" "CDIV-DOUBLE-DOUBLE-FLOAT")
|
|
| 1921 | 1924 | (:shadow "CLASS" "STRUCTURE-CLASS" "BUILT-IN-CLASS" "STANDARD-CLASS"
|
| 1922 | 1925 | "FIND-CLASS" "CLASS-OF")
|
| 1923 | 1926 | (:export "%CLASS-LAYOUT" "%CLASS-STATE" "%CLASS-DIRECT-SUPERCLASSES"
|
| ... | ... | @@ -752,157 +752,6 @@ |
| 752 | 752 | (* s
|
| 753 | 753 | (robust-internal x y))))))
|
| 754 | 754 | |
| 755 | -;;; Same algorithm as for doubles, but the constants here are
|
|
| 756 | -;;; different. I'm guessing here for the appropriate values for
|
|
| 757 | -;;; +eps+, +rmin2+, and +rminscal+.
|
|
| 758 | -(defconstant +dd-eps+ (scale-float 1w0 -104))
|
|
| 759 | -(defconstant +dd-rmin+ least-positive-normalized-double-double-float)
|
|
| 760 | -(defconstant +dd-rbig+ (/ most-positive-double-double-float 2))
|
|
| 761 | -(defconstant +dd-rmin2+ (scale-float 1w0 -105))
|
|
| 762 | -(defconstant +dd-rminscal+ (scale-float 1w0 102))
|
|
| 763 | -(defconstant +dd-rmax2+ (* +dd-rbig+ +dd-rmin2+))
|
|
| 764 | -(defconstant +dd-be+ (/ 2 (* +dd-eps+ +dd-eps+)))
|
|
| 765 | -(defconstant +dd-2/eps+ (/ 2 +dd-eps+))
|
|
| 766 | - |
|
| 767 | -(defun cdiv-double-double-float (x y)
|
|
| 768 | - (declare (type (complex vm::double-double-float) x y)
|
|
| 769 | - (optimize (speed 3) (safety 0)))
|
|
| 770 | - (labels
|
|
| 771 | - ((internal-compreal (a b c d r tt)
|
|
| 772 | - (declare (vm::double-double-float a b c d r tt))
|
|
| 773 | - ;; Compute the real part of the complex division
|
|
| 774 | - ;; (a+ib)/(c+id), assuming |c| <= |d|. r = d/c and tt = 1/(c+d*r).
|
|
| 775 | - ;;
|
|
| 776 | - ;; The realpart is (a*c+b*d)/(c^2+d^2).
|
|
| 777 | - ;;
|
|
| 778 | - ;; c^2+d^2 = c*(c+d*(d/c)) = c*(c+d*r)
|
|
| 779 | - ;;
|
|
| 780 | - ;; Then
|
|
| 781 | - ;;
|
|
| 782 | - ;; (a*c+b*d)/(c^2+d^2) = (a*c+b*d)/(c*(c+d*r))
|
|
| 783 | - ;; = (a + b*d/c)/(c+d*r)
|
|
| 784 | - ;; = (a + b*r)/(c + d*r).
|
|
| 785 | - ;;
|
|
| 786 | - ;; Thus tt = (c + d*r).
|
|
| 787 | - (cond ((>= (abs r) +dd-rmin+)
|
|
| 788 | - (let ((br (* b r)))
|
|
| 789 | - (if (/= br 0)
|
|
| 790 | - (/ (+ a br) tt)
|
|
| 791 | - ;; b*r underflows. Instead, compute
|
|
| 792 | - ;;
|
|
| 793 | - ;; (a + b*r)*tt = a*tt + b*tt*r
|
|
| 794 | - ;; = a*tt + (b*tt)*r
|
|
| 795 | - ;; (a + b*r)/tt = a/tt + b/tt*r
|
|
| 796 | - ;; = a*tt + (b*tt)*r
|
|
| 797 | - (+ (/ a tt)
|
|
| 798 | - (* (/ b tt)
|
|
| 799 | - r)))))
|
|
| 800 | - (t
|
|
| 801 | - ;; r = 0 so d is very tiny compared to c.
|
|
| 802 | - ;;
|
|
| 803 | - (/ (+ a (* d (/ b c)))
|
|
| 804 | - tt))))
|
|
| 805 | - (robust-subinternal (a b c d)
|
|
| 806 | - (declare (vm::double-double-float a b c d))
|
|
| 807 | - (let* ((r (/ d c))
|
|
| 808 | - (tt (+ c (* d r))))
|
|
| 809 | - ;; e is the real part and f is the imaginary part. We
|
|
| 810 | - ;; can use internal-compreal for the imaginary part by
|
|
| 811 | - ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is
|
|
| 812 | - ;; the same as the real part of (b-i*a)/(c+i*d).
|
|
| 813 | - (let ((e (internal-compreal a b c d r tt))
|
|
| 814 | - (f (internal-compreal b (- a) c d r tt)))
|
|
| 815 | - (values e
|
|
| 816 | - f))))
|
|
| 817 | -
|
|
| 818 | - (robust-internal (x y)
|
|
| 819 | - (declare (type (complex vm::double-double-float) x y))
|
|
| 820 | - (let ((a (realpart x))
|
|
| 821 | - (b (imagpart x))
|
|
| 822 | - (c (realpart y))
|
|
| 823 | - (d (imagpart y)))
|
|
| 824 | - (declare (vm::double-double-float a b c d))
|
|
| 825 | - (flet ((maybe-scale (abs-tst a b c d)
|
|
| 826 | - (declare (vm::double-double-float a b c d))
|
|
| 827 | - ;; This implements McGehearty's iteration 3 to
|
|
| 828 | - ;; handle the case when some values are too big
|
|
| 829 | - ;; and should be scaled down. Also if some
|
|
| 830 | - ;; values are too tiny, scale them up.
|
|
| 831 | - (let ((abs-a (abs a))
|
|
| 832 | - (abs-b (abs b)))
|
|
| 833 | - (if (or (> abs-tst +dd-rbig+)
|
|
| 834 | - (> abs-a +dd-rbig+)
|
|
| 835 | - (> abs-b +dd-rbig+))
|
|
| 836 | - (setf a (* a 0.5d0)
|
|
| 837 | - b (* b 0.5d0)
|
|
| 838 | - c (* c 0.5d0)
|
|
| 839 | - d (* d 0.5d0))
|
|
| 840 | - (if (< abs-tst +dd-rmin2+)
|
|
| 841 | - (setf a (* a +dd-rminscal+)
|
|
| 842 | - b (* b +dd-rminscal+)
|
|
| 843 | - c (* c +dd-rminscal+)
|
|
| 844 | - d (* d +dd-rminscal+))
|
|
| 845 | - (if (or (and (< abs-a +dd-rmin+)
|
|
| 846 | - (< abs-b +dd-rmax2+)
|
|
| 847 | - (< abs-tst +dd-rmax2+))
|
|
| 848 | - (and (< abs-b +dd-rmin+)
|
|
| 849 | - (< abs-a +dd-rmax2+)
|
|
| 850 | - (< abs-tst +dd-rmax2+)))
|
|
| 851 | - (setf a (* a +dd-rminscal+)
|
|
| 852 | - b (* b +dd-rminscal+)
|
|
| 853 | - c (* c +dd-rminscal+)
|
|
| 854 | - d (* d +dd-rminscal+)))))
|
|
| 855 | - (values a b c d))))
|
|
| 856 | - (cond
|
|
| 857 | - ((<= (abs d) (abs c))
|
|
| 858 | - ;; |d| <= |c|, so we can use robust-subinternal to
|
|
| 859 | - ;; perform the division.
|
|
| 860 | - (multiple-value-bind (a b c d)
|
|
| 861 | - (maybe-scale (abs c) a b c d)
|
|
| 862 | - (multiple-value-bind (e f)
|
|
| 863 | - (robust-subinternal a b c d)
|
|
| 864 | - (complex e f))))
|
|
| 865 | - (t
|
|
| 866 | - ;; |d| > |c|. So, instead compute
|
|
| 867 | - ;;
|
|
| 868 | - ;; (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
|
|
| 869 | - ;;
|
|
| 870 | - ;; Compare this to (a+i*b)/(c+i*d) and we see that
|
|
| 871 | - ;; realpart of the former is the same, but the
|
|
| 872 | - ;; imagpart of the former is the negative of the
|
|
| 873 | - ;; desired division.
|
|
| 874 | - (multiple-value-bind (a b c d)
|
|
| 875 | - (maybe-scale (abs d) a b c d)
|
|
| 876 | - (multiple-value-bind (e f)
|
|
| 877 | - (robust-subinternal b a d c)
|
|
| 878 | - (complex e (- f))))))))))
|
|
| 879 | - (let* ((a (realpart x))
|
|
| 880 | - (b (imagpart x))
|
|
| 881 | - (c (realpart y))
|
|
| 882 | - (d (imagpart y))
|
|
| 883 | - (ab (max (abs a) (abs b)))
|
|
| 884 | - (cd (max (abs c) (abs d)))
|
|
| 885 | - (s 1w0))
|
|
| 886 | - (declare (vm::double-double-float s))
|
|
| 887 | - ;; If a or b is big, scale down a and b.
|
|
| 888 | - (when (>= ab +dd-rbig+)
|
|
| 889 | - (setf x (/ x 2)
|
|
| 890 | - s (* s 2)))
|
|
| 891 | - ;; If c or d is big, scale down c and d.
|
|
| 892 | - (when (>= cd +dd-rbig+)
|
|
| 893 | - (setf y (/ y 2)
|
|
| 894 | - s (/ s 2)))
|
|
| 895 | - ;; If a or b is tiny, scale up a and b.
|
|
| 896 | - (when (<= ab (* +dd-rmin+ +dd-2/eps+))
|
|
| 897 | - (setf x (* x +dd-be+)
|
|
| 898 | - s (/ s +dd-be+)))
|
|
| 899 | - ;; If c or d is tiny, scale up c and d.
|
|
| 900 | - (when (<= cd (* +dd-rmin+ +dd-2/eps+))
|
|
| 901 | - (setf y (* y +dd-be+)
|
|
| 902 | - s (* s +dd-be+)))
|
|
| 903 | - (* s
|
|
| 904 | - (robust-internal x y)))))
|
|
| 905 | - |
|
| 906 | 755 | ;; Smith's algorithm for complex division for (complex single-float).
|
| 907 | 756 | ;; We convert the parts to double-floats before computing the result.
|
| 908 | 757 | (defun cdiv-single-float (x y)
|
| ... | ... | @@ -948,7 +797,10 @@ |
| 948 | 797 | (((complex double-double-float)
|
| 949 | 798 | (foreach (complex rational) (complex single-float) (complex double-float)
|
| 950 | 799 | (complex double-double-float)))
|
| 800 | + (c::cdiv-double-double-float x
|
|
| 801 | + (coerce y '(complex double-double-float)))
|
|
| 951 | 802 | ;; We should do something better for double-double floats.
|
| 803 | + #+nil
|
|
| 952 | 804 | (let ((a (realpart x))
|
| 953 | 805 | (b (imagpart x))
|
| 954 | 806 | (c (realpart y))
|
| ... | ... | @@ -969,6 +821,9 @@ |
| 969 | 821 | (((foreach integer ratio single-float double-float double-double-float
|
| 970 | 822 | (complex rational) (complex single-float) (complex double-float))
|
| 971 | 823 | (complex double-double-float))
|
| 824 | + (c::cdiv-double-double-float (coerce x '(complex double-double-float))
|
|
| 825 | + y)
|
|
| 826 | + #+nil
|
|
| 972 | 827 | (let ((a (realpart x))
|
| 973 | 828 | (b (imagpart x))
|
| 974 | 829 | (c (realpart y))
|
| ... | ... | @@ -691,4 +691,166 @@ |
| 691 | 691 | (kernel:double-double-lo a)
|
| 692 | 692 | (kernel:double-double-hi b)
|
| 693 | 693 | (kernel:double-double-lo b)))
|
| 694 | + |
|
| 695 | +;; An implementation of Baudin and Smith's robust complex division for
|
|
| 696 | +;; double-floats. This is a pretty straightforward translation of the
|
|
| 697 | +;; original in https://arxiv.org/pdf/1210.4539.
|
|
| 698 | +;;
|
|
| 699 | +;; This also includes improvements mentioned in
|
|
| 700 | +;; https://lpc.events/event/11/contributions/1005/attachments/856/1625/Complex_divide.pdf.
|
|
| 701 | +;; In particular iteration 1 and 3 are added. Iteration 2 and 4 were
|
|
| 702 | +;; not added. The test examples from iteration 2 and 4 didn't change
|
|
| 703 | +;; with or without changes added.
|
|
| 704 | +(let* ((+dd-eps+ (scale-float 1w0 -104))
|
|
| 705 | + (+dd-rmin+ least-positive-normalized-double-double-float)
|
|
| 706 | + (+dd-rbig+ (/ most-positive-double-double-float 2))
|
|
| 707 | + (+dd-rmin2+ (scale-float 1w0 -105))
|
|
| 708 | + (+dd-rminscal+ (scale-float 1w0 102))
|
|
| 709 | + (+dd-rmax2+ (* +dd-rbig+ +dd-rmin2+))
|
|
| 710 | + (+dd-be+ (/ 2 (* +dd-eps+ +dd-eps+)))
|
|
| 711 | + (+dd-2/eps+ (/ 2 +dd-eps+)))
|
|
| 712 | + (declare (double-double-float +dd-eps+ +dd-rmin+ +dd-rbig+ +dd-rmin2+
|
|
| 713 | + +dd-rminscal+ +dd-rmax2+ +dd-be+ +dd-2/eps+))
|
|
| 714 | + (defun cdiv-double-double-float (x y)
|
|
| 715 | + (declare (type (complex double-double-float) x y)
|
|
| 716 | + (optimize (speed 2) (safety 0)))
|
|
| 717 | + (labels
|
|
| 718 | + ((internal-compreal (a b c d r tt)
|
|
| 719 | + (declare (double-double-float a b c d r tt))
|
|
| 720 | + ;; Compute the real part of the complex division
|
|
| 721 | + ;; (a+ib)/(c+id), assuming |c| <= |d|. r = d/c and tt = 1/(c+d*r).
|
|
| 722 | + ;;
|
|
| 723 | + ;; The realpart is (a*c+b*d)/(c^2+d^2).
|
|
| 724 | + ;;
|
|
| 725 | + ;; c^2+d^2 = c*(c+d*(d/c)) = c*(c+d*r)
|
|
| 726 | + ;;
|
|
| 727 | + ;; Then
|
|
| 728 | + ;;
|
|
| 729 | + ;; (a*c+b*d)/(c^2+d^2) = (a*c+b*d)/(c*(c+d*r))
|
|
| 730 | + ;; = (a + b*d/c)/(c+d*r)
|
|
| 731 | + ;; = (a + b*r)/(c + d*r).
|
|
| 732 | + ;;
|
|
| 733 | + ;; Thus tt = (c + d*r).
|
|
| 734 | + (cond ((>= (abs r) +dd-rmin+)
|
|
| 735 | + (let ((br (* b r)))
|
|
| 736 | + (if (/= br 0)
|
|
| 737 | + (/ (+ a br) tt)
|
|
| 738 | + ;; b*r underflows. Instead, compute
|
|
| 739 | + ;;
|
|
| 740 | + ;; (a + b*r)*tt = a*tt + b*tt*r
|
|
| 741 | + ;; = a*tt + (b*tt)*r
|
|
| 742 | + ;; (a + b*r)/tt = a/tt + b/tt*r
|
|
| 743 | + ;; = a*tt + (b*tt)*r
|
|
| 744 | + (+ (/ a tt)
|
|
| 745 | + (* (/ b tt)
|
|
| 746 | + r)))))
|
|
| 747 | + (t
|
|
| 748 | + ;; r = 0 so d is very tiny compared to c.
|
|
| 749 | + ;;
|
|
| 750 | + (/ (+ a (* d (/ b c)))
|
|
| 751 | + tt))))
|
|
| 752 | + (robust-subinternal (a b c d)
|
|
| 753 | + (declare (double-double-float a b c d))
|
|
| 754 | + (let* ((r (/ d c))
|
|
| 755 | + (tt (+ c (* d r))))
|
|
| 756 | + ;; e is the real part and f is the imaginary part. We
|
|
| 757 | + ;; can use internal-compreal for the imaginary part by
|
|
| 758 | + ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is
|
|
| 759 | + ;; the same as the real part of (b-i*a)/(c+i*d).
|
|
| 760 | + (let ((e (internal-compreal a b c d r tt))
|
|
| 761 | + (f (internal-compreal b (- a) c d r tt)))
|
|
| 762 | + (values e
|
|
| 763 | + f))))
|
|
| 764 | +
|
|
| 765 | + (robust-internal (x y)
|
|
| 766 | + (declare (type (complex double-double-float) x y))
|
|
| 767 | + (let ((a (realpart x))
|
|
| 768 | + (b (imagpart x))
|
|
| 769 | + (c (realpart y))
|
|
| 770 | + (d (imagpart y)))
|
|
| 771 | + (declare (double-double-float a b c d))
|
|
| 772 | + (flet ((maybe-scale (abs-tst a b c d)
|
|
| 773 | + (declare (double-double-float a b c d))
|
|
| 774 | + ;; This implements McGehearty's iteration 3 to
|
|
| 775 | + ;; handle the case when some values are too big
|
|
| 776 | + ;; and should be scaled down. Also if some
|
|
| 777 | + ;; values are too tiny, scale them up.
|
|
| 778 | + (let ((abs-a (abs a))
|
|
| 779 | + (abs-b (abs b)))
|
|
| 780 | + (if (or (> abs-tst +dd-rbig+)
|
|
| 781 | + (> abs-a +dd-rbig+)
|
|
| 782 | + (> abs-b +dd-rbig+))
|
|
| 783 | + (setf a (* a 0.5d0)
|
|
| 784 | + b (* b 0.5d0)
|
|
| 785 | + c (* c 0.5d0)
|
|
| 786 | + d (* d 0.5d0))
|
|
| 787 | + (if (< abs-tst +dd-rmin2+)
|
|
| 788 | + (setf a (* a +dd-rminscal+)
|
|
| 789 | + b (* b +dd-rminscal+)
|
|
| 790 | + c (* c +dd-rminscal+)
|
|
| 791 | + d (* d +dd-rminscal+))
|
|
| 792 | + (if (or (and (< abs-a +dd-rmin+)
|
|
| 793 | + (< abs-b +dd-rmax2+)
|
|
| 794 | + (< abs-tst +dd-rmax2+))
|
|
| 795 | + (and (< abs-b +dd-rmin+)
|
|
| 796 | + (< abs-a +dd-rmax2+)
|
|
| 797 | + (< abs-tst +dd-rmax2+)))
|
|
| 798 | + (setf a (* a +dd-rminscal+)
|
|
| 799 | + b (* b +dd-rminscal+)
|
|
| 800 | + c (* c +dd-rminscal+)
|
|
| 801 | + d (* d +dd-rminscal+)))))
|
|
| 802 | + (values a b c d))))
|
|
| 803 | + (cond
|
|
| 804 | + ((<= (abs d) (abs c))
|
|
| 805 | + ;; |d| <= |c|, so we can use robust-subinternal to
|
|
| 806 | + ;; perform the division.
|
|
| 807 | + (multiple-value-bind (a b c d)
|
|
| 808 | + (maybe-scale (abs c) a b c d)
|
|
| 809 | + (multiple-value-bind (e f)
|
|
| 810 | + (robust-subinternal a b c d)
|
|
| 811 | + (complex e f))))
|
|
| 812 | + (t
|
|
| 813 | + ;; |d| > |c|. So, instead compute
|
|
| 814 | + ;;
|
|
| 815 | + ;; (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
|
|
| 816 | + ;;
|
|
| 817 | + ;; Compare this to (a+i*b)/(c+i*d) and we see that
|
|
| 818 | + ;; realpart of the former is the same, but the
|
|
| 819 | + ;; imagpart of the former is the negative of the
|
|
| 820 | + ;; desired division.
|
|
| 821 | + (multiple-value-bind (a b c d)
|
|
| 822 | + (maybe-scale (abs d) a b c d)
|
|
| 823 | + (multiple-value-bind (e f)
|
|
| 824 | + (robust-subinternal b a d c)
|
|
| 825 | + (complex e (- f))))))))))
|
|
| 826 | + (let* ((a (realpart x))
|
|
| 827 | + (b (imagpart x))
|
|
| 828 | + (c (realpart y))
|
|
| 829 | + (d (imagpart y))
|
|
| 830 | + (ab (max (abs a) (abs b)))
|
|
| 831 | + (cd (max (abs c) (abs d)))
|
|
| 832 | + (s 1w0))
|
|
| 833 | + (declare (double-double-float s))
|
|
| 834 | + ;; If a or b is big, scale down a and b.
|
|
| 835 | + (when (>= ab +dd-rbig+)
|
|
| 836 | + (setf x (/ x 2)
|
|
| 837 | + s (* s 2)))
|
|
| 838 | + ;; If c or d is big, scale down c and d.
|
|
| 839 | + (when (>= cd +dd-rbig+)
|
|
| 840 | + (setf y (/ y 2)
|
|
| 841 | + s (/ s 2)))
|
|
| 842 | + ;; If a or b is tiny, scale up a and b.
|
|
| 843 | + (when (<= ab (* +dd-rmin+ +dd-2/eps+))
|
|
| 844 | + (setf x (* x +dd-be+)
|
|
| 845 | + s (/ s +dd-be+)))
|
|
| 846 | + ;; If c or d is tiny, scale up c and d.
|
|
| 847 | + (when (<= cd (* +dd-rmin+ +dd-2/eps+))
|
|
| 848 | + (setf y (* y +dd-be+)
|
|
| 849 | + s (* s +dd-be+)))
|
|
| 850 | + (* s
|
|
| 851 | + (robust-internal x y))))))
|
|
| 852 | + |
|
| 853 | +(deftransform / ((x y) ((complex double-double-float) (complex double-double-float))
|
|
| 854 | + *)
|
|
| 855 | + `(cdiv-double-double-float x y))
|
|
| 694 | 856 | ) ; end progn |
| ... | ... | @@ -509,9 +509,8 @@ |
| 509 | 509 | (complex (rational (realpart b))
|
| 510 | 510 | (rational (imagpart b))))
|
| 511 | 511 | '(complex ext:double-double-float))))
|
| 512 | - (let* ((z (kernel::cdiv-double-double-float
|
|
| 513 | - (coerce x '(complex ext:double-double-float))
|
|
| 514 | - (coerce y '(complex ext:double-double-float))))
|
|
| 512 | + (let* ((z (/ (coerce x '(complex ext:double-double-float))
|
|
| 513 | + (coerce y '(complex ext:double-double-float))))
|
|
| 515 | 514 | (z-true (compute-true x y))
|
| 516 | 515 | (rel (rel-err z z-true)))
|
| 517 | 516 | (assert-equal expected-rel-w
|