Raymond Toy pushed to branch issue-461-cdiv-use-4-doubles at cmucl / cmucl

Commits:

1 changed file:

Changes:

  • src/code/numbers.lisp
    ... ... @@ -612,25 +612,33 @@
    612 612
     (defconstant +cdiv-be+ (/ 2 (* +cdiv-eps+ +cdiv-eps+)))
    
    613 613
     (defconstant +cdiv-2/eps+ (/ 2 +cdiv-eps+))
    
    614 614
     
    
    615
    -;; Same constants but for double-double-floats.  Some of these aren't
    
    616
    -;; well-defined for double-double-floats so we make our best guess at
    
    615
    +;; Same constants but for DOUBLE-DOUBLE-FLOATS.  Some of these aren't
    
    616
    +;; well-defined for DOUBLE-DOUBLE-FLOATS so we make our best guess at
    
    617 617
     ;; what they might be.  Since double-doubles have about twice as many
    
    618
    -;; bits of precision as a double-float, we generally just double the
    
    619
    -;; exponent of the corresponding double-float values above.
    
    618
    +;; bits of precision as a DOUBLE-FLOAT, we generally just double the
    
    619
    +;; exponent (for SCALE-FLOAT) of the corresponding DOUBLE-FLOAT values
    
    620
    +;; above.
    
    621
    +;;
    
    622
    +;; Note also that both LEAST-POSITIVE-NORMALIZED-DOUBLE-DOUBLE-FLOAT
    
    623
    +;; and MOST-POSITIVE-DOUBLE-DOUBLE-FLOAT have a low component of 0, so
    
    624
    +;; there's no loss of precision if we use the corresponding
    
    625
    +;; DOUBLE-FLOAT values.  Likewise, all the other constants here are
    
    626
    +;; powers of 2, so the DOUBLE-DOUBLE-FLOAT values can be represented
    
    627
    +;; exactly as a DOUBLE-FLOAT.  We can use DOUBLE-FLOAT values.
    
    620 628
     (defconstant +cdiv-dd-rmin+
    
    621
    -  least-positive-normalized-double-double-float)
    
    629
    +  least-positive-normalized-double-float)
    
    622 630
     (defconstant +cdiv-dd-rbig+
    
    623
    -  (/ most-positive-double-double-float 2))
    
    631
    +  (/ most-positive-double-float 2))
    
    624 632
     (defconstant +cdiv-dd-rmin2+
    
    625
    -  (scale-float 1w0 -106))
    
    633
    +  (scale-float 1d0 -106))
    
    626 634
     (defconstant +cdiv-dd-rminscal+
    
    627
    -  (scale-float 1w0 102))
    
    635
    +  (scale-float 1d0 102))
    
    628 636
     (defconstant +cdiv-dd-rmax2+
    
    629 637
       (* +cdiv-dd-rbig+ +cdiv-dd-rmin2+))
    
    630 638
     ;; Epsilon for double-doubles isn't really well-defined because things
    
    631 639
     ;; like (+ 1w0 1w-200) is a valid double-double float.
    
    632 640
     (defconstant +cdiv-dd-eps+
    
    633
    -  (scale-float 1w0 -104))
    
    641
    +  (scale-float 1d0 -104))
    
    634 642
     (defconstant +cdiv-dd-be+
    
    635 643
       (/ 2 (* +cdiv-dd-eps+ +cdiv-dd-eps+)))
    
    636 644
     (defconstant +cdiv-dd-2/eps+
    
    ... ... @@ -648,161 +656,174 @@
    648 656
     ;; The code for both the double-float and double-double-float
    
    649 657
     ;; implementation is basically identical except for the constants.
    
    650 658
     ;; use a macro to generate both versions.
    
    651
    -(macrolet
    
    652
    -    ((frob (type)
    
    653
    -       (let* ((dd (if (eq type 'double-float)
    
    654
    -		      ""
    
    655
    -		      "DD-"))
    
    656
    -	      (name (symbolicate "CDIV-" type))
    
    657
    -	      (docstring (let ((*print-case* :downcase))
    
    658
    -			   (format nil "Accurate division of complex ~A numbers x and y."
    
    659
    -				   type)))
    
    660
    -	      ;; Create the correct symbols for the constants we need,
    
    661
    -	      ;; as defined above.
    
    662
    -	      (+rmin+ (symbolicate "+CDIV-" dd "RMIN+"))
    
    663
    -	      (+rbig+ (symbolicate "+CDIV-" dd "RBIG+"))
    
    664
    -	      (+rmin2+ (symbolicate "+CDIV-" dd "RMIN2+"))
    
    665
    -	      (+rminscal+ (symbolicate "+CDIV-" dd "RMINSCAL+"))
    
    666
    -	      (+rmax2+ (symbolicate "+CDIV-" dd "RMAX2+"))
    
    667
    -	      (+eps+ (symbolicate "+CDIV-" dd "EPS+"))
    
    668
    -	      (+be+ (symbolicate "+CDIV-" dd "BE+"))
    
    669
    -	      (+2/eps+ (symbolicate "+CDIV-" dd "2/EPS+")))
    
    670
    -	 `(defun ,name (xr xi yr yi)
    
    671
    -	    ,docstring
    
    672
    -	    (declare (,type xr xi yr yi)
    
    673
    -		     (optimize (speed 3) (safety 0)))
    
    674
    -	    (labels
    
    675
    -		((internal-compreal (a b c d r tt)
    
    676
    -		   (declare (,type a b c d r tt))
    
    677
    -		   ;; Compute the real part of the complex division
    
    678
    -		   ;; (a+ib)/(c+id), assuming |c| <= |d|.  r = d/c and tt = 1/(c+d*r).
    
    679
    -		   ;;
    
    680
    -		   ;; The realpart is (a*c+b*d)/(c^2+d^2).
    
    681
    -		   ;;
    
    682
    -		   ;;   c^2+d^2 = c*(c+d*(d/c)) = c*(c+d*r)
    
    683
    -		   ;;
    
    684
    -		   ;; Then
    
    685
    -		   ;;
    
    686
    -		   ;;   (a*c+b*d)/(c^2+d^2) = (a*c+b*d)/(c*(c+d*r))
    
    687
    -		   ;;                       = (a + b*d/c)/(c+d*r)
    
    688
    -		   ;;                       = (a + b*r)/(c + d*r).
    
    689
    -		   ;;
    
    690
    -		   ;; Thus tt = (c + d*r).
    
    691
    -		   (cond ((>= (abs r) ,+rmin+)
    
    692
    -			  (let ((br (* b r)))
    
    693
    -			    (if (/= br 0)
    
    694
    -				(/ (+ a br) tt)
    
    695
    -				;; b*r underflows.  Instead, compute
    
    696
    -				;;
    
    697
    -				;; (a + b*r)*tt = a*tt + b*tt*r
    
    698
    -				;;              = a*tt + (b*tt)*r
    
    699
    -				;; (a + b*r)/tt = a/tt + b/tt*r
    
    700
    -				;;              = a*tt + (b*tt)*r
    
    701
    -				(+ (/ a tt)
    
    702
    -				   (* (/ b tt)
    
    703
    -				      r)))))
    
    704
    -			 (t
    
    705
    -			  ;; r = 0 so d is very tiny compared to c.
    
    706
    -			  ;;
    
    707
    -			  (/ (+ a (* d (/ b c)))
    
    708
    -			     tt))))
    
    709
    -		 (robust-subinternal (a b c d)
    
    710
    -		   (declare (,type a b c d))
    
    711
    -		   (let* ((r (/ d c))
    
    712
    -			  (tt (+ c (* d r))))
    
    713
    -		     ;; e is the real part and f is the imaginary part.  We
    
    714
    -		     ;; can use internal-compreal for the imaginary part by
    
    715
    -		     ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is
    
    716
    -		     ;; the same as the real part of (b-i*a)/(c+i*d).
    
    717
    -		     (let ((e (internal-compreal a b c d r tt))
    
    718
    -			   (f (internal-compreal b (- a) c d r tt)))
    
    719
    -		       (values e
    
    720
    -			       f))))
    
    721
    -		 (robust-internal (a b c d)
    
    722
    -		   (declare (type ,type a b c d))
    
    723
    -		   (flet ((maybe-scale (abs-tst a b c d)
    
    724
    -			    (declare (,type a b c d))
    
    725
    -			    ;; This implements McGehearty's iteration 3 to
    
    726
    -			    ;; handle the case when some values are too big
    
    727
    -			    ;; and should be scaled down.  Also if some
    
    728
    -			    ;; values are too tiny, scale them up.
    
    729
    -			    (let ((abs-a (abs a))
    
    730
    -				  (abs-b (abs b)))
    
    731
    -			      (if (or (> abs-tst ,+rbig+)
    
    732
    -				      (> abs-a ,+rbig+)
    
    733
    -				      (> abs-b ,+rbig+))
    
    734
    -				  (setf a (* a 0.5d0)
    
    735
    -					b (* b 0.5d0)
    
    736
    -					c (* c 0.5d0)
    
    737
    -					d (* d 0.5d0))
    
    738
    -				  (if (< abs-tst ,+rmin2+)
    
    739
    -				      (setf a (* a ,+rminscal+)
    
    740
    -					    b (* b ,+rminscal+)
    
    741
    -					    c (* c ,+rminscal+)
    
    742
    -					    d (* d ,+rminscal+))
    
    743
    -				      (if (or (and (< abs-a ,+rmin+)
    
    744
    -						   (< abs-b ,+rmax2+)
    
    745
    -						   (< abs-tst ,+rmax2+))
    
    746
    -					      (and (< abs-b ,+rmin+)
    
    747
    -						   (< abs-a ,+rmax2+)
    
    748
    -						   (< abs-tst ,+rmax2+)))
    
    749
    -					  (setf a (* a ,+rminscal+)
    
    750
    -						b (* b ,+rminscal+)
    
    751
    -						c (* c ,+rminscal+)
    
    752
    -						d (* d ,+rminscal+)))))
    
    753
    -			      (values a b c d))))
    
    754
    -		     (cond
    
    755
    -		       ((<= (abs d) (abs c))
    
    756
    -			;; |d| <= |c|, so we can use robust-subinternal to
    
    757
    -			;; perform the division.
    
    758
    -			(multiple-value-bind (a b c d)
    
    759
    -			    (maybe-scale (abs c) a b c d)
    
    760
    -			  (multiple-value-bind (e f)
    
    761
    -			      (robust-subinternal a b c d)
    
    762
    -			    (complex e f))))
    
    763
    -		       (t
    
    764
    -			;; |d| > |c|.  So, instead compute
    
    765
    -			;;
    
    766
    -			;;   (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
    
    767
    -			;;
    
    768
    -			;; Compare this to (a+i*b)/(c+i*d) and we see that
    
    769
    -			;; realpart of the former is the same, but the
    
    770
    -			;; imagpart of the former is the negative of the
    
    771
    -			;; desired division.
    
    772
    -			(multiple-value-bind (a b c d)
    
    773
    -			    (maybe-scale (abs d) a b c d)
    
    774
    -			  (multiple-value-bind (e f)
    
    775
    -			      (robust-subinternal b a d c)
    
    776
    -			    (complex e (- f)))))))))
    
    777
    -	      (let* ((xabs (max (abs xr) (abs xi)))
    
    778
    -		     (yabs (max (abs yr) (abs yi)))
    
    779
    -		     (s (coerce 1 ',type)))
    
    780
    -		(declare (,type s))
    
    781
    -		;; If xr or xi is big, scale down xr and xi.
    
    782
    -		(when (>= xabs ,+rbig+)
    
    783
    -		  (setf xr (/ xr 2)
    
    784
    -			xi (/ xi 2)
    
    785
    -			s (* s 2)))
    
    786
    -		;; If yr or yi is big, scale down yr and yi.
    
    787
    -		(when (>= yabs ,+rbig+)
    
    788
    -		  (setf yr (/ yr 2)
    
    789
    -			yi (/ yi 2)
    
    790
    -			s (/ s 2)))
    
    791
    -		;; If xr or xi is tiny, scale up xr and xi.
    
    792
    -		(when (<= xabs (* ,+rmin+ ,+2/eps+))
    
    793
    -		  (setf xr (* xr ,+be+)
    
    794
    -			xi (* xi ,+be+)
    
    795
    -			s (/ s ,+be+)))
    
    796
    -		;; If yr or yi is tiny, scale up yr and yi.
    
    797
    -		(when (<= yabs (* ,+rmin+ ,+2/eps+))
    
    798
    -		  (setf yr (* yr ,+be+)
    
    799
    -			yi (* yi ,+be+)
    
    800
    -			s (* s ,+be+)))
    
    801
    -		(* s
    
    802
    -		   (robust-internal xr xi yr yi))))))))
    
    803
    -  (frob double-float)
    
    804
    -  #+double-double
    
    805
    -  (frob double-double-float))
    
    659
    +(eval-when (:compile-toplevel)
    
    660
    +(defmacro define-cdiv (type)
    
    661
    +  (let* ((dd (if (eq type 'double-float)
    
    662
    +		 ""
    
    663
    +		 "DD-"))
    
    664
    +	 (opt '((optimize (speed 3) (safety 0))))
    
    665
    +	 (name (symbolicate "CDIV-" type))
    
    666
    +	 (compreal (symbolicate "CDIV-" type "-INTERNAL-COMPREAL"))
    
    667
    +	 (subinternal (symbolicate "CDIV-" type "-ROBUST-SUBINTERNAL"))
    
    668
    +	 (internal (symbolicate "CDIV-" type "-ROBUST-INTERNAL"))
    
    669
    +	 (docstring (let ((*print-case* :downcase))
    
    670
    +		      (format nil "Accurate division of complex ~A numbers x and y."
    
    671
    +			      type)))
    
    672
    +	 ;; Create the correct symbols for the constants we need,
    
    673
    +	 ;; as defined above.
    
    674
    +	 (+rmin+ (symbolicate "+CDIV-" dd "RMIN+"))
    
    675
    +	 (+rbig+ (symbolicate "+CDIV-" dd "RBIG+"))
    
    676
    +	 (+rmin2+ (symbolicate "+CDIV-" dd "RMIN2+"))
    
    677
    +	 (+rminscal+ (symbolicate "+CDIV-" dd "RMINSCAL+"))
    
    678
    +	 (+rmax2+ (symbolicate "+CDIV-" dd "RMAX2+"))
    
    679
    +	 (+be+ (symbolicate "+CDIV-" dd "BE+"))
    
    680
    +	 (+2/eps+ (symbolicate "+CDIV-" dd "2/EPS+")))
    
    681
    +    `(progn
    
    682
    +       (defun ,compreal (a b c d r tt)
    
    683
    +	 (declare (,type a b c d r tt)
    
    684
    +		  ,@opt)
    
    685
    +	 ;; Computes the real part of the complex division
    
    686
    +	 ;; (a+ib)/(c+id), assuming |d| <= |c|.  Then let r = d/c and
    
    687
    +	 ;; tt = (c+d*r).
    
    688
    +	 ;;
    
    689
    +	 ;; The realpart is (a*c+b*d)/(c^2+d^2).
    
    690
    +	 ;;
    
    691
    +	 ;; The denominator is
    
    692
    +	 ;;
    
    693
    +	 ;;   c^2+d^2 = c*(c+d*(d/c)) = c*(c+d*r)
    
    694
    +	 ;;
    
    695
    +	 ;; Then
    
    696
    +	 ;;
    
    697
    +	 ;;   (a*c+b*d)/(c^2+d^2) = (a*c+b*d)/(c*(c+d*r))
    
    698
    +	 ;;                       = (a + b*d/c)/(c+d*r)
    
    699
    +	 ;;                       = (a + b*r)/(c + d*r).
    
    700
    +	 ;;
    
    701
    +	 ;; Thus tt = (c + d*r).
    
    702
    +	 (cond ((>= (abs r) ,+rmin+)
    
    703
    +		(let ((br (* b r)))
    
    704
    +		  (if (/= br 0)
    
    705
    +		      (/ (+ a br) tt)
    
    706
    +		      ;; b*r underflows.  Instead, compute
    
    707
    +		      ;;
    
    708
    +		      ;; (a + b*r)/tt = a/tt + b/tt*r
    
    709
    +		      (+ (/ a tt)
    
    710
    +			 (* (/ b tt)
    
    711
    +			    r)))))
    
    712
    +	       (t
    
    713
    +		;; r is very small so d is very tiny compared to c.
    
    714
    +		;;
    
    715
    +		(/ (+ a (* d (/ b c)))
    
    716
    +		   tt))))
    
    717
    +       (defun ,subinternal (a b c d)
    
    718
    +	 (declare (,type a b c d)
    
    719
    +		  ,@opt)
    
    720
    +	 (let* ((r (/ d c))
    
    721
    +		(tt (+ c (* d r))))
    
    722
    +	   ;; e is the real part and f is the imaginary part.  We
    
    723
    +	   ;; can use internal-compreal for the imaginary part by
    
    724
    +	   ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is
    
    725
    +	   ;; the same as the real part of (b-i*a)/(c+i*d).
    
    726
    +	   (let ((e (,compreal a b c d r tt))
    
    727
    +		 (f (,compreal b (- a) c d r tt)))
    
    728
    +	     (values e f))))
    
    729
    +       (defun ,internal (a b c d)
    
    730
    +	 (declare (,type a b c d)
    
    731
    +		  ,@opt)
    
    732
    +	 (let ()
    
    733
    +	   (flet ((maybe-scale (abs-tst a b c d)
    
    734
    +		    (declare (,type a b c d))
    
    735
    +		    ;; This implements McGehearty's iteration 3 to
    
    736
    +		    ;; handle the case when some values are too big
    
    737
    +		    ;; and should be scaled down.  Also if some
    
    738
    +		    ;; values are too tiny, scale them up.
    
    739
    +		    (let ((abs-a (abs a))
    
    740
    +			  (abs-b (abs b)))
    
    741
    +		      (if (or (> abs-tst ,+rbig+)
    
    742
    +			      (> abs-a ,+rbig+)
    
    743
    +			      (> abs-b ,+rbig+))
    
    744
    +			  (setf a (* a 0.5d0)
    
    745
    +				b (* b 0.5d0)
    
    746
    +				c (* c 0.5d0)
    
    747
    +				d (* d 0.5d0))
    
    748
    +			  (if (< abs-tst ,+rmin2+)
    
    749
    +			      (setf a (* a ,+rminscal+)
    
    750
    +				    b (* b ,+rminscal+)
    
    751
    +				    c (* c ,+rminscal+)
    
    752
    +				    d (* d ,+rminscal+))
    
    753
    +			      (if (or (and (< abs-a ,+rmin+)
    
    754
    +					   (< abs-b ,+rmax2+)
    
    755
    +					   (< abs-tst ,+rmax2+))
    
    756
    +				      (and (< abs-b ,+rmin+)
    
    757
    +					   (< abs-a ,+rmax2+)
    
    758
    +					   (< abs-tst ,+rmax2+)))
    
    759
    +				  (setf a (* a ,+rminscal+)
    
    760
    +					b (* b ,+rminscal+)
    
    761
    +					c (* c ,+rminscal+)
    
    762
    +					d (* d ,+rminscal+)))))
    
    763
    +		      (values a b c d))))
    
    764
    +	     (cond
    
    765
    +	       ((<= (abs d) (abs c))
    
    766
    +		;; |d| <= |c|, so we can use robust-subinternal to
    
    767
    +		;; perform the division.
    
    768
    +		(multiple-value-bind (a b c d)
    
    769
    +		    (maybe-scale (abs c) a b c d)
    
    770
    +		  (,subinternal a b c d)))
    
    771
    +	       (t
    
    772
    +		;; |d| > |c|.  So, instead compute
    
    773
    +		;;
    
    774
    +		;;   (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
    
    775
    +		;;
    
    776
    +		;; Compare this to (a+i*b)/(c+i*d) and we see that
    
    777
    +		;; realpart of the former is the same, but the
    
    778
    +		;; imagpart of the former is the negative of the
    
    779
    +		;; desired division.
    
    780
    +		(multiple-value-bind (a b c d)
    
    781
    +		    (maybe-scale (abs d) a b c d)
    
    782
    +		  (multiple-value-bind (e f)
    
    783
    +		      (,subinternal b a d c)
    
    784
    +		    (values e (- f)))))))))
    
    785
    +       (defun ,name (a b c d)
    
    786
    +	 ,docstring
    
    787
    +	 (declare (,type a b c d)
    
    788
    +		  ,@opt)
    
    789
    +	 (let* ((ab (max (abs a) (abs b)))
    
    790
    +		(cd (max (abs c) (abs d)))
    
    791
    +		;; S is always multipled by a power of 2.  It's either
    
    792
    +		;; 2, 0.5, or +be+, which is also a power of two.
    
    793
    +		;; (Either 2^105 or 2^209).  Hence, we can just use a
    
    794
    +		;; double-float instead of a double-double-float
    
    795
    +		;; because the double-double always has 0d0 for the lo
    
    796
    +		;; part.
    
    797
    +		(s 1d0))
    
    798
    +	   (declare (double-float s))
    
    799
    +	   ;; If a or b is big, scale down a and b.
    
    800
    +	   (when (>= ab ,+rbig+)
    
    801
    +	     (setf a (* a 0.5d0)
    
    802
    +		   b (* b 0.5d0)
    
    803
    +		   s (* s 2)))
    
    804
    +	   ;; If c or d is big, scale down c and d.
    
    805
    +	   (when (>= cd ,+rbig+)
    
    806
    +	     (setf c (* c 0.5d0)
    
    807
    +		   d (* d 0.5d0)
    
    808
    +		   s (* s 0.5d0)))
    
    809
    +	   ;; If a or b is tiny, scale up a and b.
    
    810
    +	   (when (<= ab (* ,+rmin+ ,+2/eps+))
    
    811
    +	     (setf a (* a ,+be+)
    
    812
    +		   b (* b ,+be+)
    
    813
    +		   s (/ s ,+be+)))
    
    814
    +	   ;; If c or d is tiny, scale up c and d.
    
    815
    +	   (when (<= cd (* ,+rmin+ ,+2/eps+))
    
    816
    +	     (setf c (* c ,+be+)
    
    817
    +		   d (* d ,+be+)
    
    818
    +		   s (* s ,+be+)))
    
    819
    +	   (multiple-value-bind (e f)
    
    820
    +	       (,internal a b c d)
    
    821
    +	     (complex (* s e)
    
    822
    +		      (* s f))))))))
    
    823
    +)
    
    824
    +
    
    825
    +(define-cdiv double-float)
    
    826
    +(define-cdiv double-double-float)
    
    806 827
     
    
    807 828
     ;; Smith's algorithm for complex division for (complex single-float).
    
    808 829
     ;; We convert the parts to double-floats before computing the result.