| ... |
... |
@@ -643,11 +643,18 @@ |
|
643
|
643
|
;; else.
|
|
644
|
644
|
(declaim (ext:start-block cdiv-double-float cdiv-single-float
|
|
645
|
645
|
cdiv-double-double-float
|
|
646
|
|
- two-arg-/))
|
|
|
646
|
+ two-arg-/
|
|
|
647
|
+ cdiv-double-float-internal-compreal
|
|
|
648
|
+ cdiv-double-float-robust-subinternal
|
|
|
649
|
+ cdiv-double-float-robust-internal
|
|
|
650
|
+ cdiv-double-double-float-internal-compreal
|
|
|
651
|
+ cdiv-double-double-float-robust-subinternal
|
|
|
652
|
+ cdiv-double-double-float-robust-internal))
|
|
647
|
653
|
|
|
648
|
654
|
;; The code for both the double-float and double-double-float
|
|
649
|
655
|
;; implementation is basically identical except for the constants.
|
|
650
|
656
|
;; use a macro to generate both versions.
|
|
|
657
|
+#+nil
|
|
651
|
658
|
(macrolet
|
|
652
|
659
|
((frob (type)
|
|
653
|
660
|
(let* ((dd (if (eq type 'double-float)
|
| ... |
... |
@@ -809,6 +816,180 @@ |
|
809
|
816
|
#+double-double
|
|
810
|
817
|
(frob double-double-float))
|
|
811
|
818
|
|
|
|
819
|
+(eval-when (:compile-toplevel)
|
|
|
820
|
+(defmacro define-cdiv (type)
|
|
|
821
|
+ (let* ((dd (if (eq type 'double-float)
|
|
|
822
|
+ ""
|
|
|
823
|
+ "DD-"))
|
|
|
824
|
+ (opt '((optimize (speed 3) (safety 0))))
|
|
|
825
|
+ (name (symbolicate "CDIV-" type))
|
|
|
826
|
+ (compreal (symbolicate "CDIV-" type "-INTERNAL-COMPREAL"))
|
|
|
827
|
+ (subinternal (symbolicate "CDIV-" type "-ROBUST-SUBINTERNAL"))
|
|
|
828
|
+ (internal (symbolicate "CDIV-" type "-ROBUST-INTERNAL"))
|
|
|
829
|
+ (docstring (let ((*print-case* :downcase))
|
|
|
830
|
+ (format nil "Accurate division of complex ~A numbers x and y."
|
|
|
831
|
+ type)))
|
|
|
832
|
+ ;; Create the correct symbols for the constants we need,
|
|
|
833
|
+ ;; as defined above.
|
|
|
834
|
+ (+rmin+ (symbolicate "+CDIV-" dd "RMIN+"))
|
|
|
835
|
+ (+rbig+ (symbolicate "+CDIV-" dd "RBIG+"))
|
|
|
836
|
+ (+rmin2+ (symbolicate "+CDIV-" dd "RMIN2+"))
|
|
|
837
|
+ (+rminscal+ (symbolicate "+CDIV-" dd "RMINSCAL+"))
|
|
|
838
|
+ (+rmax2+ (symbolicate "+CDIV-" dd "RMAX2+"))
|
|
|
839
|
+ (+be+ (symbolicate "+CDIV-" dd "BE+"))
|
|
|
840
|
+ (+2/eps+ (symbolicate "+CDIV-" dd "2/EPS+")))
|
|
|
841
|
+ `(progn
|
|
|
842
|
+ (defun ,compreal (a b c d r tt)
|
|
|
843
|
+ (declare (,type a b c d r tt)
|
|
|
844
|
+ ,@opt)
|
|
|
845
|
+ ;; Compute the real part of the complex division
|
|
|
846
|
+ ;; (a+ib)/(c+id), assuming |c| <= |d|. r = d/c and tt = 1/(c+d*r).
|
|
|
847
|
+ ;;
|
|
|
848
|
+ ;; The realpart is (a*c+b*d)/(c^2+d^2).
|
|
|
849
|
+ ;;
|
|
|
850
|
+ ;; c^2+d^2 = c*(c+d*(d/c)) = c*(c+d*r)
|
|
|
851
|
+ ;;
|
|
|
852
|
+ ;; Then
|
|
|
853
|
+ ;;
|
|
|
854
|
+ ;; (a*c+b*d)/(c^2+d^2) = (a*c+b*d)/(c*(c+d*r))
|
|
|
855
|
+ ;; = (a + b*d/c)/(c+d*r)
|
|
|
856
|
+ ;; = (a + b*r)/(c + d*r).
|
|
|
857
|
+ ;;
|
|
|
858
|
+ ;; Thus tt = (c + d*r).
|
|
|
859
|
+ (cond ((>= (abs r) ,+rmin+)
|
|
|
860
|
+ (let ((br (* b r)))
|
|
|
861
|
+ (if (/= br 0)
|
|
|
862
|
+ (/ (+ a br) tt)
|
|
|
863
|
+ ;; b*r underflows. Instead, compute
|
|
|
864
|
+ ;;
|
|
|
865
|
+ ;; (a + b*r)*tt = a*tt + b*tt*r
|
|
|
866
|
+ ;; = a*tt + (b*tt)*r
|
|
|
867
|
+ ;; (a + b*r)/tt = a/tt + b/tt*r
|
|
|
868
|
+ ;; = a*tt + (b*tt)*r
|
|
|
869
|
+ (+ (/ a tt)
|
|
|
870
|
+ (* (/ b tt)
|
|
|
871
|
+ r)))))
|
|
|
872
|
+ (t
|
|
|
873
|
+ ;; r = 0 so d is very tiny compared to c.
|
|
|
874
|
+ ;;
|
|
|
875
|
+ (/ (+ a (* d (/ b c)))
|
|
|
876
|
+ tt))))
|
|
|
877
|
+ (defun ,subinternal (a b c d)
|
|
|
878
|
+ (declare (,type a b c d)
|
|
|
879
|
+ ,@opt)
|
|
|
880
|
+ (let* ((r (/ d c))
|
|
|
881
|
+ (tt (+ c (* d r))))
|
|
|
882
|
+ ;; e is the real part and f is the imaginary part. We
|
|
|
883
|
+ ;; can use internal-compreal for the imaginary part by
|
|
|
884
|
+ ;; noticing that the imaginary part of (a+i*b)/(c+i*d) is
|
|
|
885
|
+ ;; the same as the real part of (b-i*a)/(c+i*d).
|
|
|
886
|
+ (let ((e (,compreal a b c d r tt))
|
|
|
887
|
+ (f (,compreal b (- a) c d r tt)))
|
|
|
888
|
+ (values e f))))
|
|
|
889
|
+ (defun ,internal (x y)
|
|
|
890
|
+ (declare (type (complex ,type) x y)
|
|
|
891
|
+ ,@opt)
|
|
|
892
|
+ (let ((a (realpart x))
|
|
|
893
|
+ (b (imagpart x))
|
|
|
894
|
+ (c (realpart y))
|
|
|
895
|
+ (d (imagpart y)))
|
|
|
896
|
+ (declare (,type a b c d))
|
|
|
897
|
+ (flet ((maybe-scale (abs-tst a b c d)
|
|
|
898
|
+ (declare (,type a b c d))
|
|
|
899
|
+ ;; This implements McGehearty's iteration 3 to
|
|
|
900
|
+ ;; handle the case when some values are too big
|
|
|
901
|
+ ;; and should be scaled down. Also if some
|
|
|
902
|
+ ;; values are too tiny, scale them up.
|
|
|
903
|
+ (let ((abs-a (abs a))
|
|
|
904
|
+ (abs-b (abs b)))
|
|
|
905
|
+ (if (or (> abs-tst ,+rbig+)
|
|
|
906
|
+ (> abs-a ,+rbig+)
|
|
|
907
|
+ (> abs-b ,+rbig+))
|
|
|
908
|
+ (setf a (* a 0.5d0)
|
|
|
909
|
+ b (* b 0.5d0)
|
|
|
910
|
+ c (* c 0.5d0)
|
|
|
911
|
+ d (* d 0.5d0))
|
|
|
912
|
+ (if (< abs-tst ,+rmin2+)
|
|
|
913
|
+ (setf a (* a ,+rminscal+)
|
|
|
914
|
+ b (* b ,+rminscal+)
|
|
|
915
|
+ c (* c ,+rminscal+)
|
|
|
916
|
+ d (* d ,+rminscal+))
|
|
|
917
|
+ (if (or (and (< abs-a ,+rmin+)
|
|
|
918
|
+ (< abs-b ,+rmax2+)
|
|
|
919
|
+ (< abs-tst ,+rmax2+))
|
|
|
920
|
+ (and (< abs-b ,+rmin+)
|
|
|
921
|
+ (< abs-a ,+rmax2+)
|
|
|
922
|
+ (< abs-tst ,+rmax2+)))
|
|
|
923
|
+ (setf a (* a ,+rminscal+)
|
|
|
924
|
+ b (* b ,+rminscal+)
|
|
|
925
|
+ c (* c ,+rminscal+)
|
|
|
926
|
+ d (* d ,+rminscal+)))))
|
|
|
927
|
+ (values a b c d))))
|
|
|
928
|
+ (cond
|
|
|
929
|
+ ((<= (abs d) (abs c))
|
|
|
930
|
+ ;; |d| <= |c|, so we can use robust-subinternal to
|
|
|
931
|
+ ;; perform the division.
|
|
|
932
|
+ (multiple-value-bind (a b c d)
|
|
|
933
|
+ (maybe-scale (abs c) a b c d)
|
|
|
934
|
+ (multiple-value-bind (e f)
|
|
|
935
|
+ (,subinternal a b c d)
|
|
|
936
|
+ (complex e f))))
|
|
|
937
|
+ (t
|
|
|
938
|
+ ;; |d| > |c|. So, instead compute
|
|
|
939
|
+ ;;
|
|
|
940
|
+ ;; (b + i*a)/(d + i*c) = ((b*d+a*c) + (a*d-b*c)*i)/(d^2+c^2)
|
|
|
941
|
+ ;;
|
|
|
942
|
+ ;; Compare this to (a+i*b)/(c+i*d) and we see that
|
|
|
943
|
+ ;; realpart of the former is the same, but the
|
|
|
944
|
+ ;; imagpart of the former is the negative of the
|
|
|
945
|
+ ;; desired division.
|
|
|
946
|
+ (multiple-value-bind (a b c d)
|
|
|
947
|
+ (maybe-scale (abs d) a b c d)
|
|
|
948
|
+ (multiple-value-bind (e f)
|
|
|
949
|
+ (,subinternal b a d c)
|
|
|
950
|
+ (complex e (- f)))))))))
|
|
|
951
|
+ (defun ,name (x y)
|
|
|
952
|
+ ,docstring
|
|
|
953
|
+ (declare (type (complex ,type) x y)
|
|
|
954
|
+ ,@opt)
|
|
|
955
|
+ (let* ((a (realpart x))
|
|
|
956
|
+ (b (imagpart x))
|
|
|
957
|
+ (c (realpart y))
|
|
|
958
|
+ (d (imagpart y))
|
|
|
959
|
+ (ab (max (abs a) (abs b)))
|
|
|
960
|
+ (cd (max (abs c) (abs d)))
|
|
|
961
|
+ ;; S is always multipled by a power of 2. It's either
|
|
|
962
|
+ ;; 2, 0.5, or +be+, which is also a power of two.
|
|
|
963
|
+ ;; (Either 2^105 or 2^209). Hence, we can just use a
|
|
|
964
|
+ ;; double-float instead of a double-double-float
|
|
|
965
|
+ ;; because the double-double always has 0d0 for the lo
|
|
|
966
|
+ ;; part.
|
|
|
967
|
+ (s 1d0))
|
|
|
968
|
+ (declare (double-float s))
|
|
|
969
|
+ ;; If a or b is big, scale down a and b.
|
|
|
970
|
+ (when (>= ab ,+rbig+)
|
|
|
971
|
+ (setf x (* x 0.5d0)
|
|
|
972
|
+ s (* s 2)))
|
|
|
973
|
+ ;; If c or d is big, scale down c and d.
|
|
|
974
|
+ (when (>= cd ,+rbig+)
|
|
|
975
|
+ (setf y (* y 0.5d0)
|
|
|
976
|
+ s (* s 0.5d0)))
|
|
|
977
|
+ ;; If a or b is tiny, scale up a and b.
|
|
|
978
|
+ (when (<= ab (* ,+rmin+ ,+2/eps+))
|
|
|
979
|
+ (setf x (* x ,+be+)
|
|
|
980
|
+ s (/ s ,+be+)))
|
|
|
981
|
+ ;; If c or d is tiny, scale up c and d.
|
|
|
982
|
+ (when (<= cd (* ,+rmin+ ,+2/eps+))
|
|
|
983
|
+ (setf y (* y ,+be+)
|
|
|
984
|
+ s (* s ,+be+)))
|
|
|
985
|
+ (* s
|
|
|
986
|
+ (,internal x y)))))))
|
|
|
987
|
+)
|
|
|
988
|
+
|
|
|
989
|
+(define-cdiv double-float)
|
|
|
990
|
+(define-cdiv double-double-float)
|
|
|
991
|
+
|
|
|
992
|
+
|
|
812
|
993
|
;; Smith's algorithm for complex division for (complex single-float).
|
|
813
|
994
|
;; We convert the parts to double-floats before computing the result.
|
|
814
|
995
|
(defun cdiv-single-float (x y)
|