Raymond Toy pushed to branch issue-459-more-accurate-dd-complex-div at cmucl / cmucl

Commits:

1 changed file:

Changes:

  • src/code/numbers.lisp
    ... ... @@ -602,53 +602,11 @@
    602 602
     ;; In particular iteration 1 and 3 are added.  Iteration 2 and 4 were
    
    603 603
     ;; not added.  The test examples from iteration 2 and 4 didn't change
    
    604 604
     ;; with or without changes added.
    
    605
    -(defconstant +cdiv-rmin+ least-positive-normalized-double-float)
    
    606
    -(defconstant +cdiv-rbig+ (/ most-positive-double-float 2))
    
    607
    -(defconstant +cdiv-rmin2+ (scale-float 1d0 -53))
    
    608
    -(defconstant +cdiv-rminscal+ (scale-float 1d0 51))
    
    609
    -(defconstant +cdiv-rmax2+ (* +cdiv-rbig+ +cdiv-rmin2+))
    
    610
    -;; This is the value of %eps from Scilab
    
    611
    -(defconstant +cdiv-eps+ (scale-float 1d0 -52))
    
    612
    -(defconstant +cdiv-be+ (/ 2 (* +cdiv-eps+ +cdiv-eps+)))
    
    613
    -(defconstant +cdiv-2/eps+ (/ 2 +cdiv-eps+))
    
    614
    -
    
    615
    -;; Same constants but for DOUBLE-DOUBLE-FLOATS.  Some of these aren't
    
    616
    -;; well-defined for DOUBLE-DOUBLE-FLOATS so we make our best guess at
    
    617
    -;; what they might be.  Since double-doubles have about twice as many
    
    618
    -;; bits of precision as a DOUBLE-FLOAT, we generally just double the
    
    619
    -;; exponent (for SCALE-FLOAT) of the corresponding DOUBLE-FLOAT values
    
    620
    -;; above.
    
    621 605
     ;;
    
    622
    -;; Note also that both LEAST-POSITIVE-NORMALIZED-DOUBLE-DOUBLE-FLOAT
    
    623
    -;; and MOST-POSITIVE-DOUBLE-DOUBLE-FLOAT have a low component of 0, so
    
    624
    -;; there's no loss of precision if we use the corresponding
    
    625
    -;; DOUBLE-FLOAT values.  Likewise, all the other constants here are
    
    626
    -;; powers of 2, so the DOUBLE-DOUBLE-FLOAT values can be represented
    
    627
    -;; exactly as a DOUBLE-FLOAT.  We can use DOUBLE-FLOAT values.
    
    628
    -(defconstant +cdiv-dd-rmin+
    
    629
    -  least-positive-normalized-double-float)
    
    630
    -(defconstant +cdiv-dd-rbig+
    
    631
    -  (/ most-positive-double-float 2))
    
    632
    -(defconstant +cdiv-dd-rmin2+
    
    633
    -  (scale-float 1d0 -106))
    
    634
    -(defconstant +cdiv-dd-rminscal+
    
    635
    -  (scale-float 1d0 102))
    
    636
    -(defconstant +cdiv-dd-rmax2+
    
    637
    -  (* +cdiv-dd-rbig+ +cdiv-dd-rmin2+))
    
    638
    -;; Epsilon for double-doubles isn't really well-defined because things
    
    639
    -;; like (+ 1w0 1w-200) is a valid double-double float.
    
    640
    -(defconstant +cdiv-dd-eps+
    
    641
    -  (scale-float 1d0 -104))
    
    642
    -(defconstant +cdiv-dd-be+
    
    643
    -  (/ 2 (* +cdiv-dd-eps+ +cdiv-dd-eps+)))
    
    644
    -(defconstant +cdiv-dd-2/eps+
    
    645
    -  (/ 2 +cdiv-dd-eps+))
    
    646
    -
    
    647
    -
    
    648
    -;; Make these functions accessible.  cdiv-double-float and
    
    649
    -;; cdiv-single-float are used by deftransforms.  Of course, two-arg-/
    
    650
    -;; is the interface to division.  cdiv-generic isn't used anywhere
    
    651
    -;; else.
    
    606
    +;; Make these functions accessible outside the block compilation.
    
    607
    +;; CDIV-DOUBLE-FLOAT and CDIV-SINGLE-FLOAT are used by deftransforms.
    
    608
    +;; Of course, TWO-ARG-/ is the interface to division.  CDIV-GENERIC
    
    609
    +;; isn't used anywhere else.
    
    652 610
     (declaim (ext:start-block cdiv-double-float cdiv-single-float
    
    653 611
     			  cdiv-double-double-float
    
    654 612
     			  two-arg-/))
    
    ... ... @@ -657,27 +615,15 @@
    657 615
     ;; implementation is basically identical except for the constants.
    
    658 616
     ;; use a macro to generate both versions.
    
    659 617
     (eval-when (:compile-toplevel)
    
    660
    -(defmacro define-cdiv (type)
    
    661
    -  (let* ((dd (if (eq type 'double-float)
    
    662
    -		 ""
    
    663
    -		 "DD-"))
    
    664
    -	 (opt '((optimize (speed 3) (safety 0))))
    
    618
    +(defmacro define-cdiv (type &key rmin rbig rmin2 rminscal eps)
    
    619
    +  (let* ((opt '((optimize (speed 3) (safety 0))))
    
    665 620
     	 (name (symbolicate "CDIV-" type))
    
    666 621
     	 (compreal (symbolicate "CDIV-" type "-INTERNAL-COMPREAL"))
    
    667 622
     	 (subinternal (symbolicate "CDIV-" type "-ROBUST-SUBINTERNAL"))
    
    668 623
     	 (internal (symbolicate "CDIV-" type "-ROBUST-INTERNAL"))
    
    669 624
     	 (docstring (let ((*print-case* :downcase))
    
    670 625
     		      (format nil "Accurate division of complex ~A numbers x and y."
    
    671
    -			      type)))
    
    672
    -	 ;; Create the correct symbols for the constants we need,
    
    673
    -	 ;; as defined above.
    
    674
    -	 (+rmin+ (symbolicate "+CDIV-" dd "RMIN+"))
    
    675
    -	 (+rbig+ (symbolicate "+CDIV-" dd "RBIG+"))
    
    676
    -	 (+rmin2+ (symbolicate "+CDIV-" dd "RMIN2+"))
    
    677
    -	 (+rminscal+ (symbolicate "+CDIV-" dd "RMINSCAL+"))
    
    678
    -	 (+rmax2+ (symbolicate "+CDIV-" dd "RMAX2+"))
    
    679
    -	 (+be+ (symbolicate "+CDIV-" dd "BE+"))
    
    680
    -	 (+2/eps+ (symbolicate "+CDIV-" dd "2/EPS+")))
    
    626
    +			      type))))
    
    681 627
         `(progn
    
    682 628
            (defun ,compreal (a b c d r tt)
    
    683 629
     	 (declare (,type a b c d r tt)
    
    ... ... @@ -699,7 +645,7 @@
    699 645
     	 ;;                       = (a + b*r)/(c + d*r).
    
    700 646
     	 ;;
    
    701 647
     	 ;; Thus tt = (c + d*r).
    
    702
    -	 (cond ((>= (abs r) ,+rmin+)
    
    648
    +	 (cond ((>= (abs r) ,rmin)
    
    703 649
     		(let ((br (* b r)))
    
    704 650
     		  (if (/= br 0)
    
    705 651
     		      (/ (+ a br) tt)
    
    ... ... @@ -719,115 +665,154 @@
    719 665
     		  ,@opt)
    
    720 666
     	 (let* ((r (/ d c))
    
    721 667
     		(tt (+ c (* d r))))
    
    722
    -	   ;; e is the real part and f is the imaginary part.  We
    
    723
    -	   ;; can use internal-compreal for the imaginary part by
    
    724
    -	   ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is
    
    725
    -	   ;; the same as the real part of (b-i*a)/(c+i*d).
    
    668
    +	   ;; e is the real part and f is the imaginary part.  We can
    
    669
    +	   ;; use COMPREAL for the imaginary part by noticing that the
    
    670
    +	   ;; imaginary part of (a+i*b)/(c+i*d) is the same as the
    
    671
    +	   ;; real part of (b-i*a)/(c+i*d).
    
    726 672
     	   (let ((e (,compreal a b c d r tt))
    
    727 673
     		 (f (,compreal b (- a) c d r tt)))
    
    728 674
     	     (values e f))))
    
    729 675
            (defun ,internal (x y)
    
    730
    -	 (declare (type (complex ,type) x y)
    
    731
    -		  ,@opt)
    
    732
    -	 (let ((a (realpart x))
    
    733
    -	       (b (imagpart x))
    
    734
    -	       (c (realpart y))
    
    735
    -	       (d (imagpart y)))
    
    736
    -	   (declare (,type a b c d))
    
    737
    -	   (flet ((maybe-scale (abs-tst a b c d)
    
    738
    -		    (declare (,type a b c d))
    
    739
    -		    ;; This implements McGehearty's iteration 3 to
    
    740
    -		    ;; handle the case when some values are too big
    
    741
    -		    ;; and should be scaled down.  Also if some
    
    742
    -		    ;; values are too tiny, scale them up.
    
    743
    -		    (let ((abs-a (abs a))
    
    744
    -			  (abs-b (abs b)))
    
    745
    -		      (if (or (> abs-tst ,+rbig+)
    
    746
    -			      (> abs-a ,+rbig+)
    
    747
    -			      (> abs-b ,+rbig+))
    
    748
    -			  (setf a (* a 0.5d0)
    
    749
    -				b (* b 0.5d0)
    
    750
    -				c (* c 0.5d0)
    
    751
    -				d (* d 0.5d0))
    
    752
    -			  (if (< abs-tst ,+rmin2+)
    
    753
    -			      (setf a (* a ,+rminscal+)
    
    754
    -				    b (* b ,+rminscal+)
    
    755
    -				    c (* c ,+rminscal+)
    
    756
    -				    d (* d ,+rminscal+))
    
    757
    -			      (if (or (and (< abs-a ,+rmin+)
    
    758
    -					   (< abs-b ,+rmax2+)
    
    759
    -					   (< abs-tst ,+rmax2+))
    
    760
    -				      (and (< abs-b ,+rmin+)
    
    761
    -					   (< abs-a ,+rmax2+)
    
    762
    -					   (< abs-tst ,+rmax2+)))
    
    763
    -				  (setf a (* a ,+rminscal+)
    
    764
    -					b (* b ,+rminscal+)
    
    765
    -					c (* c ,+rminscal+)
    
    766
    -					d (* d ,+rminscal+)))))
    
    767
    -		      (values a b c d))))
    
    768
    -	     (cond
    
    769
    -	       ((<= (abs d) (abs c))
    
    770
    -		;; |d| <= |c|, so we can use robust-subinternal to
    
    771
    -		;; perform the division.
    
    772
    -		(multiple-value-bind (a b c d)
    
    773
    -		    (maybe-scale (abs c) a b c d)
    
    774
    -		  (,subinternal a b c d)))
    
    775
    -	       (t
    
    776
    -		;; |d| > |c|.  So, instead compute
    
    777
    -		;;
    
    778
    -		;;   (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
    
    779
    -		;;
    
    780
    -		;; Compare this to (a+i*b)/(c+i*d) and we see that
    
    781
    -		;; realpart of the former is the same, but the
    
    782
    -		;; imagpart of the former is the negative of the
    
    783
    -		;; desired division.
    
    784
    -		(multiple-value-bind (a b c d)
    
    785
    -		    (maybe-scale (abs d) a b c d)
    
    786
    -		  (multiple-value-bind (e f)
    
    787
    -		      (,subinternal b a d c)
    
    788
    -		    (values e (- f)))))))))
    
    789
    -       (defun ,name (x y)
    
    790
    -	 ,docstring
    
    791
    -	 (declare (type (complex ,type) x y)
    
    792
    -		  ,@opt)
    
    793
    -	 (let* ((a (realpart x))
    
    794
    -		(b (imagpart x))
    
    795
    -		(c (realpart y))
    
    796
    -		(d (imagpart y))
    
    797
    -		(ab (max (abs a) (abs b)))
    
    798
    -		(cd (max (abs c) (abs d)))
    
    799
    -		;; S is always multipled by a power of 2.  It's either
    
    800
    -		;; 2, 0.5, or +be+, which is also a power of two.
    
    801
    -		;; (Either 2^105 or 2^209).  Hence, we can just use a
    
    802
    -		;; double-float instead of a double-double-float
    
    803
    -		;; because the double-double always has 0d0 for the lo
    
    804
    -		;; part.
    
    805
    -		(s 1d0))
    
    806
    -	   (declare (double-float s))
    
    807
    -	   ;; If a or b is big, scale down a and b.
    
    808
    -	   (when (>= ab ,+rbig+)
    
    809
    -	     (setf x (* x 0.5d0)
    
    810
    -		   s (* s 2)))
    
    811
    -	   ;; If c or d is big, scale down c and d.
    
    812
    -	   (when (>= cd ,+rbig+)
    
    813
    -	     (setf y (* y 0.5d0)
    
    814
    -		   s (* s 0.5d0)))
    
    815
    -	   ;; If a or b is tiny, scale up a and b.
    
    816
    -	   (when (<= ab (* ,+rmin+ ,+2/eps+))
    
    817
    -	     (setf x (* x ,+be+)
    
    818
    -		   s (/ s ,+be+)))
    
    819
    -	   ;; If c or d is tiny, scale up c and d.
    
    820
    -	   (when (<= cd (* ,+rmin+ ,+2/eps+))
    
    821
    -	     (setf y (* y ,+be+)
    
    822
    -		   s (* s ,+be+)))
    
    823
    -	   (multiple-value-bind (e f)
    
    824
    -	       (,internal x y)
    
    825
    -	     (complex (* s e)
    
    826
    -		      (* s f))))))))
    
    676
    +	   (declare (type (complex ,type) x y)
    
    677
    +		    ,@opt)
    
    678
    +	   (let ((a (realpart x))
    
    679
    +		 (b (imagpart x))
    
    680
    +		 (c (realpart y))
    
    681
    +		 (d (imagpart y)))
    
    682
    +	     (declare (,type a b c d))
    
    683
    +	     (flet ((maybe-scale (abs-tst a b c d)
    
    684
    +		      (declare (,type a b c d))
    
    685
    +		      ;; This implements McGehearty's iteration 3 to
    
    686
    +		      ;; handle the case when some values are too big
    
    687
    +		      ;; and should be scaled down.  Also if some
    
    688
    +		      ;; values are too tiny, scale them up.
    
    689
    +		      (let ((+rbig+ ,rbig)
    
    690
    +			    (+rmin2+ ,rmin2)
    
    691
    +			    (+rminscal+ ,rminscal)
    
    692
    +			    (+rmax2+ (* ,rbig ,rmin2))
    
    693
    +			    (+rmin+ ,rmin)
    
    694
    +			    (abs-a (abs a))
    
    695
    +			    (abs-b (abs b)))
    
    696
    +			(if (or (> abs-tst +rbig+)
    
    697
    +				(> abs-a +rbig+)
    
    698
    +				(> abs-b +rbig+))
    
    699
    +			    (setf a (* a 0.5d0)
    
    700
    +				  b (* b 0.5d0)
    
    701
    +				  c (* c 0.5d0)
    
    702
    +				  d (* d 0.5d0))
    
    703
    +			    (if (< abs-tst +rmin2+)
    
    704
    +				(setf a (* a +rminscal+)
    
    705
    +				      b (* b +rminscal+)
    
    706
    +				      c (* c +rminscal+)
    
    707
    +				      d (* d +rminscal+))
    
    708
    +				(if (or (and (< abs-a +rmin+)
    
    709
    +					     (< abs-b +rmax2+)
    
    710
    +					     (< abs-tst +rmax2+))
    
    711
    +					(and (< abs-b +rmin+)
    
    712
    +					     (< abs-a +rmax2+)
    
    713
    +					     (< abs-tst +rmax2+)))
    
    714
    +				    (setf a (* a +rminscal+)
    
    715
    +					  b (* b +rminscal+)
    
    716
    +					  c (* c +rminscal+)
    
    717
    +					  d (* d +rminscal+)))))
    
    718
    +			(values a b c d))))
    
    719
    +	       (cond
    
    720
    +		 ((<= (abs d) (abs c))
    
    721
    +		  ;; |d| <= |c|, so we can use robust-subinternal to
    
    722
    +		  ;; perform the division.
    
    723
    +		  (multiple-value-bind (a b c d)
    
    724
    +		      (maybe-scale (abs c) a b c d)
    
    725
    +		    (,subinternal a b c d)))
    
    726
    +		 (t
    
    727
    +		  ;; |d| > |c|.  So, instead compute
    
    728
    +		  ;;
    
    729
    +		  ;;   (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
    
    730
    +		  ;;
    
    731
    +		  ;; Compare this to (a+i*b)/(c+i*d) and we see that
    
    732
    +		  ;; realpart of the former is the same, but the
    
    733
    +		  ;; imagpart of the former is the negative of the
    
    734
    +		  ;; desired division.
    
    735
    +		  (multiple-value-bind (a b c d)
    
    736
    +		      (maybe-scale (abs d) a b c d)
    
    737
    +		    (multiple-value-bind (e f)
    
    738
    +			(,subinternal b a d c)
    
    739
    +		      (values e (- f)))))))))
    
    740
    +	 (defun ,name (x y)
    
    741
    +	   ,docstring
    
    742
    +	   (declare (type (complex ,type) x y)
    
    743
    +		    ,@opt)
    
    744
    +	   (let ((+rmin+ ,rmin)
    
    745
    +		 (+rbig+ ,rbig)
    
    746
    +		 (+2/eps+ (/ 2 ,eps))
    
    747
    +		 (+be+ (/ 2 (* ,eps ,eps))))
    
    748
    +	     (let* ((a (realpart x))
    
    749
    +		    (b (imagpart x))
    
    750
    +		    (c (realpart y))
    
    751
    +		    (d (imagpart y))
    
    752
    +		    (ab (max (abs a) (abs b)))
    
    753
    +		    (cd (max (abs c) (abs d)))
    
    754
    +		    ;; S is always multipled by a power of 2.  It's either
    
    755
    +		    ;; 2, 0.5, or +be+, which is also a power of two.
    
    756
    +		    ;; (Either 2^105 or 2^209).  Hence, we can just use a
    
    757
    +		    ;; double-float instead of a double-double-float
    
    758
    +		    ;; because the double-double always has 0d0 for the lo
    
    759
    +		    ;; part.
    
    760
    +		    (s 1d0))
    
    761
    +	       (declare (double-float s))
    
    762
    +	       ;; If a or b is big, scale down a and b.
    
    763
    +	       (when (>= ab +rbig+)
    
    764
    +		 (setf x (* x 0.5d0)
    
    765
    +		       s (* s 2)))
    
    766
    +	       ;; If c or d is big, scale down c and d.
    
    767
    +	       (when (>= cd +rbig+)
    
    768
    +		 (setf y (* y 0.5d0)
    
    769
    +		       s (* s 0.5d0)))
    
    770
    +	       ;; If a or b is tiny, scale up a and b.
    
    771
    +	       (when (<= ab (* +rmin+ +2/eps+))
    
    772
    +		 (setf x (* x +be+)
    
    773
    +		       s (/ s +be+)))
    
    774
    +	       ;; If c or d is tiny, scale up c and d.
    
    775
    +	       (when (<= cd (* +rmin+ +2/eps+))
    
    776
    +		 (setf y (* y +be+)
    
    777
    +		       s (* s +be+)))
    
    778
    +	       (multiple-value-bind (e f)
    
    779
    +		   (,internal x y)
    
    780
    +		 (complex (* s e)
    
    781
    +			  (* s f)))))))))
    
    827 782
     )
    
    828 783
     
    
    829
    -(define-cdiv double-float)
    
    830
    -(define-cdiv double-double-float)
    
    784
    +(define-cdiv double-float
    
    785
    +  :rmin least-positive-normalized-double-float
    
    786
    +  :rbig (/ most-positive-double-float 2)
    
    787
    +  :rmin2 (scale-float 1d0 -53)
    
    788
    +  :rminscal (scale-float 1d0 51)
    
    789
    +  ;; This is the value of %eps from Scilab
    
    790
    +  :eps (scale-float 1d0 -52))
    
    791
    +
    
    792
    +;; Same constants but for DOUBLE-DOUBLE-FLOATS.  Some of these aren't
    
    793
    +;; well-defined for DOUBLE-DOUBLE-FLOATS (like eps) so we make our
    
    794
    +;; best guess at what they might be.  Since double-doubles have
    
    795
    +;; twice as many bits of precision as a DOUBLE-FLOAT, we generally
    
    796
    +;; just double the exponent (for SCALE-FLOAT) of the corresponding
    
    797
    +;; DOUBLE-FLOAT values above.
    
    798
    +;;
    
    799
    +;; Note also that since both
    
    800
    +;; LEAST-POSITIVE-NORMALIZED-DOUBLE-DOUBLE-FLOAT and
    
    801
    +;; MOST-POSITIVE-DOUBLE-DOUBLE-FLOAT have a low component of 0,
    
    802
    +;; there's no loss of precision if we use the corresponding
    
    803
    +;; DOUBLE-FLOAT values.  Likewise, all the other constants here are
    
    804
    +;; powers of 2, so the DOUBLE-DOUBLE-FLOAT values can be represented
    
    805
    +;; exactly as a DOUBLE-FLOAT.  We can use DOUBLE-FLOAT values.  This
    
    806
    +;; also makes some of the operations a bit simpler since operations
    
    807
    +;; between a DOUBLE-DOUBLE-FLOAT and a DOUBLE-FLOAT are simpler than
    
    808
    +;; if both were DOUBLE-DOUBLE-FLOATs.
    
    809
    +(define-cdiv double-double-float
    
    810
    +  :rmin least-positive-normalized-double-float
    
    811
    +  :rbig (/ most-positive-double-float 2)
    
    812
    +  :rmin2 (scale-float 1d0 -106)
    
    813
    +  :rminscal (scale-float 1d0 102)
    
    814
    +  :eps (scale-float 1d0 -104))
    
    815
    +
    
    831 816
     
    
    832 817
     ;; Smith's algorithm for complex division for (complex single-float).
    
    833 818
     ;; We convert the parts to double-floats before computing the result.