Raymond Toy pushed to branch rtoy-amd64-p1 at cmucl / cmucl
Commits: 67500fc6 by Raymond Toy at 2020-04-06T17:05:02-07:00 Add defvar *num-fixups*
- - - - - 962f709d by Raymond Toy at 2020-04-06T17:05:18-07:00 Use #+double-double
- - - - - 5d19a2f6 by Raymond Toy at 2020-04-06T17:06:20-07:00 Update base-char-reg for unicode
Just porting over the x86 unicode base-char-reg stuff. Base-chars don't fit into byte regs anymore.
- - - - - 6a88e2a0 by Raymond Toy at 2020-04-06T17:06:41-07:00 Use movzx when loading type codes
- - - - - bb504616 by Raymond Toy at 2020-04-06T17:07:52-07:00 Merge x86 char stuff, basically updating for unicode.
Also add _N"" in some places to make the difference between x86 and amd64 less.
- - - - - 4ce27917 by Raymond Toy at 2020-04-06T17:08:51-07:00 Update string vops for unicode.
Mostly just copying the simple-array-unsigned-byte-16 vops to the string vops since strings hold 16-bit chars.
- - - - - b1d28406 by Raymond Toy at 2020-04-06T17:09:16-07:00 Port closure-tramp and undefined-tramp from x86
- - - - - 894977db by Raymond Toy at 2020-04-11T19:25:47-07:00 Import x86/lfoat-sse2.lisp
Modify slightly to replace uses of ebp with rbp and a few other changes to use a 64-bit reg instead of a 32-bit reg.
- - - - -
8 changed files:
- src/assembly/amd64/assem-rtns.lisp - src/compiler/amd64/array.lisp - src/compiler/amd64/char.lisp - + src/compiler/amd64/float-sse2.lisp - src/compiler/amd64/macros.lisp - src/compiler/amd64/vm.lisp - src/compiler/generic/objdef.lisp - src/tools/cross-scripts/cross-x86-amd64.lisp
Changes:
===================================== src/assembly/amd64/assem-rtns.lisp ===================================== @@ -64,7 +64,8 @@ (inst lea rdi (make-ea :qword :base rbx :disp (- word-bytes))) (inst rep) (inst movs :qword) - + (inst cld) + ;; Restore the count. (inst mov rcx rdx)
@@ -159,6 +160,7 @@ (inst sub rsi word-bytes) (inst rep) (inst movs :qword) + (inst cld)
;; Load the register arguments carefully. (loadw rdx rbp-tn -1) @@ -274,3 +276,23 @@
(inst jmp (make-ea :byte :base block :disp (* unwind-block-entry-pc-slot word-bytes)))) + +#+assembler +(define-assembly-routine (closure-tramp + (:return-style :none)) + () + (loadw rax-tn rax-tn fdefn-function-slot other-pointer-type) + (inst jmp (make-ea :qword :base rax-tn + :disp (- (* closure-function-slot word-bytes) + function-pointer-type)))) + +#+assembler +(define-assembly-routine (undefined-tramp + (:return-style :none)) + () + (let ((error (generate-error-code nil undefined-symbol-error + (make-random-tn :kind :normal + :sc (sc-or-lose 'descriptor-reg c::*backend*) + :offset 0)))) + (inst jmp error) + (inst ret)))
===================================== src/compiler/amd64/array.lisp ===================================== @@ -1234,20 +1234,13 @@ (:args (object :scs (descriptor-reg)) (index :scs (unsigned-reg))) (:arg-types simple-string positive-fixnum) - (:temporary (:sc unsigned-reg ; byte-reg - :offset rax-offset ; al-offset - :target value - :from (:eval 0) :to (:result 0)) - rax) - (:ignore rax) (:results (value :scs (base-char-reg))) (:result-types base-char) (:generator 5 - (inst mov al-tn - (make-ea :byte :base object :index index :scale 1 + (inst movzx value + (make-ea :word :base object :index index :scale 2 :disp (- (* vector-data-offset word-bytes) - other-pointer-type))) - (move value al-tn))) + other-pointer-type)))))
(define-vop (data-vector-ref-c/simple-string) (:translate data-vector-ref) @@ -1255,18 +1248,13 @@ (:args (object :scs (descriptor-reg))) (:info index) (:arg-types simple-string (:constant (signed-byte 30))) - (:temporary (:sc unsigned-reg :offset rax-offset :target value - :from (:eval 0) :to (:result 0)) - rax) - (:ignore rax) (:results (value :scs (base-char-reg))) (:result-types base-char) (:generator 4 - (inst mov al-tn - (make-ea :byte :base object - :disp (- (+ (* vector-data-offset word-bytes) index) - other-pointer-type))) - (move value al-tn))) + (inst movzx value + (make-ea :word :base object + :disp (- (+ (* vector-data-offset word-bytes) (* 2 index)) + other-pointer-type)))))
(define-vop (data-vector-set/simple-string) @@ -1276,14 +1264,18 @@ (index :scs (unsigned-reg) :to (:eval 0)) (value :scs (base-char-reg))) (:arg-types simple-string positive-fixnum base-char) - (:results (result :scs (base-char-reg))) + (:temporary (:sc unsigned-reg :offset rax-offset :target result + :from (:argument 2) :to (:result 0)) + rax) + (:results (result :scs (base-char-reg))) (:result-types base-char) (:generator 5 - (inst mov (make-ea :byte :base object :index index :scale 1 + (move rax value) + (inst mov (make-ea :word :base object :index index :scale 2 :disp (- (* vector-data-offset word-bytes) other-pointer-type)) - value) - (move result value))) + ax-tn) + (move result rax)))
(define-vop (data-vector-set/simple-string-c) @@ -1293,14 +1285,19 @@ (value :scs (base-char-reg))) (:info index) (:arg-types simple-string (:constant (signed-byte 30)) base-char) + (:temporary (:sc unsigned-reg :offset rax-offset :target result + :from (:argument 1) :to (:result 0)) + rax) (:results (result :scs (base-char-reg))) (:result-types base-char) (:generator 4 - (inst mov (make-ea :byte :base object - :disp (- (+ (* vector-data-offset word-bytes) index) - other-pointer-type)) - value) - (move result value))) + (move rax value) + (inst mov (make-ea :word :base object + :disp (- (+ (* vector-data-offset word-bytes) + (* 2 index)) + other-pointer-type)) + ax-tn) + (move result rax)))
;;; signed-byte-8
===================================== src/compiler/amd64/char.lisp ===================================== @@ -30,41 +30,31 @@ ;;; Move a tagged char to an untagged representation. ;;; (define-vop (move-to-base-char) - (:args (x :scs (any-reg control-stack) :target al)) - (:temporary (:sc byte-reg :offset al-offset - :from (:argument 0) :to (:eval 0)) al) - (:ignore al) - (:temporary (:sc byte-reg :offset ah-offset :target y - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (base-char-reg base-char-stack))) - (:note "character untagging") + (:args (x :scs (any-reg control-stack) :target y)) + (:results (y :scs (base-char-reg))) + (:note _N"character untagging") (:generator 1 - (move rax-tn x) - (move y ah))) + (move y x) + (inst shr y type-bits))) ;;; (define-move-vop move-to-base-char :move - (any-reg control-stack) (base-char-reg base-char-stack)) + (any-reg control-stack) (base-char-reg))
;;; Move an untagged char to a tagged representation. ;;; (define-vop (move-from-base-char) - (:args (x :scs (base-char-reg base-char-stack) :target ah)) - (:temporary (:sc byte-reg :offset al-offset :target y - :from (:argument 0) :to (:result 0)) al) - (:temporary (:sc byte-reg :offset ah-offset - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (any-reg descriptor-reg control-stack))) - (:note "character tagging") + (:args (x :scs (base-char-reg base-char-stack) :target y)) + (:results (y :scs (any-reg descriptor-reg))) + (:note _N"character tagging") (:generator 1 - (move ah x) ; maybe move char byte - (inst mov al base-char-type) ; #x86 to type bits - (inst and rax-tn #xffff) ; remove any junk bits - (move y rax-tn))) + (move y x) + (inst shl y type-bits) + (inst or y base-char-type)))
;;; (define-move-vop move-from-base-char :move - (base-char-reg base-char-stack) (any-reg descriptor-reg control-stack)) + (base-char-reg base-char-stack) (any-reg descriptor-reg))
;;; Move untagged base-char values. ;;; @@ -74,7 +64,7 @@ :load-if (not (location= x y)))) (:results (y :scs (base-char-reg base-char-stack) :load-if (not (location= x y)))) - (:note "character move") + (:note _N"character move") (:effects) (:affected) (:generator 0 @@ -92,15 +82,13 @@ (fp :scs (any-reg) :load-if (not (sc-is y base-char-reg)))) (:results (y)) - (:note "character arg move") + (:note _N"character arg move") (:generator 0 (sc-case y (base-char-reg (move y x)) (base-char-stack - (inst mov - (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) word-bytes))) - x))))) + (storew x fp (- (1+ (tn-offset y)))))))) ;;; (define-move-vop move-base-char-argument :move-argument (any-reg base-char-reg) (base-char-reg)) @@ -119,29 +107,22 @@ (define-vop (char-code) (:translate char-code) (:policy :fast-safe) - (:args (ch :scs (base-char-reg base-char-stack))) + (:args (ch :scs (base-char-reg base-char-stack) :target res)) (:arg-types base-char) (:results (res :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 1 - ;; ah to dh are not addressable when a rex prefix is used - ;; The high 32 bits of doubleword operands are - ;; zero-extended to 64-bits. - (inst movzx (64-bit-to-32-bit-tn res) ch))) + (move res ch)))
(define-vop (code-char) (:translate code-char) (:policy :fast-safe) - (:args (code :scs (unsigned-reg unsigned-stack) :target rax)) + (:args (code :scs (unsigned-reg control-stack) :target res)) (:arg-types positive-fixnum) - (:temporary (:sc unsigned-reg :offset rax-offset :target res - :from (:argument 0) :to (:result 0)) - rax) (:results (res :scs (base-char-reg))) (:result-types base-char) (:generator 1 - (move rax code) - (move res al-tn))) + (move res code)))
;;; Comparison of base-chars. @@ -155,7 +136,7 @@ (:conditional) (:info target not-p) (:policy :fast-safe) - (:note "inline comparison") + (:note _N"inline comparison") (:variant-vars condition not-condition) (:generator 3 (inst cmp x y) @@ -179,7 +160,7 @@ (:conditional) (:info target not-p y) (:policy :fast-safe) - (:note "inline comparison") + (:note _N"inline comparison") (:variant-vars condition not-condition) (:generator 2 (inst cmp x (char-code y))
===================================== src/compiler/amd64/float-sse2.lisp ===================================== @@ -0,0 +1,2391 @@ +;;; -*- Mode: LISP; Syntax: Common-Lisp; Base: 10; Package: x86 -*- +;;; +;;; ********************************************************************** +;;; This code was written as part of the CMU Common Lisp project at +;;; Carnegie Mellon University, and has been placed in the public domain. +;;; If you want to use this code or any part of CMU Common Lisp, please contact +;;; Scott Fahlman or slisp-group@cs.cmu.edu. +;;; +(ext:file-comment + "$Header: src/compiler/x86/float-sse2.lisp $") +;;; +;;; ********************************************************************** +;;; +;;; This file contains floating point support for the x86. +;;; + +(in-package :amd64) +(intl:textdomain "cmucl-sse2") + +;;; Popping the FP stack. +;;; +;;; The default is to use a store and pop, fstp fr0. +;;; For the AMD Athlon, using ffreep fr0 is faster. +;;; +(defun fp-pop () + (if (backend-featurep :athlon) + (inst ffreep fr0-tn) + (inst fstp fr0-tn))) + + +(macrolet ((ea-for-xf-desc (tn slot) + `(make-ea + :dword :base ,tn + :disp (- (* ,slot vm:word-bytes) vm:other-pointer-type)))) + (defun ea-for-sf-desc (tn) + (ea-for-xf-desc tn vm:single-float-value-slot)) + (defun ea-for-df-desc (tn) + (ea-for-xf-desc tn vm:double-float-value-slot)) + #+long-float + (defun ea-for-lf-desc (tn) + (ea-for-xf-desc tn vm:long-float-value-slot)) + ;; Complex floats + (defun ea-for-csf-real-desc (tn) + (ea-for-xf-desc tn vm:complex-single-float-real-slot)) + (defun ea-for-csf-imag-desc (tn) + (ea-for-xf-desc tn vm:complex-single-float-imag-slot)) + (defun ea-for-cdf-real-desc (tn) + (ea-for-xf-desc tn vm:complex-double-float-real-slot)) + (defun ea-for-cdf-imag-desc (tn) + (ea-for-xf-desc tn vm:complex-double-float-imag-slot)) + #+long-float + (defun ea-for-clf-real-desc (tn) + (ea-for-xf-desc tn vm:complex-long-float-real-slot)) + #+long-float + (defun ea-for-clf-imag-desc (tn) + (ea-for-xf-desc tn vm:complex-long-float-imag-slot)) + #+double-double + (defun ea-for-cddf-real-hi-desc (tn) + (ea-for-xf-desc tn vm:complex-double-double-float-real-hi-slot)) + #+double-double + (defun ea-for-cddf-real-lo-desc (tn) + (ea-for-xf-desc tn vm:complex-double-double-float-real-lo-slot)) + #+double-double + (defun ea-for-cddf-imag-hi-desc (tn) + (ea-for-xf-desc tn vm:complex-double-double-float-imag-hi-slot)) + #+double-double + (defun ea-for-cddf-imag-lo-desc (tn) + (ea-for-xf-desc tn vm:complex-double-double-float-imag-lo-slot)) + ) + +(macrolet ((ea-for-xf-stack (tn kind) + `(make-ea + :dword :base rbp-tn + :disp (- (* (+ (tn-offset ,tn) + (ecase ,kind (:single 1) (:double 2) (:long 3))) + vm:word-bytes))))) + (defun ea-for-sf-stack (tn) + (ea-for-xf-stack tn :single)) + (defun ea-for-df-stack (tn) + (ea-for-xf-stack tn :double)) + #+long-float + (defun ea-for-lf-stack (tn) + (ea-for-xf-stack tn :long))) + +;;; Complex float stack EAs +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base) + `(make-ea + :qword :base ,base + :disp (- (* (+ (tn-offset ,tn) + (* (ecase ,kind + (:single 1) + (:double 2) + (:long 3)) + (ecase ,slot + ;; We want the real part to be at + ;; the lower address! + (:real 2) + (:imag 1) + (:real-hi 1) + (:real-lo 2) + (:imag-hi 3) + (:imag-lo 4)))) + vm:word-bytes))))) + (defun ea-for-csf-real-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :single :real base)) + (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :single :imag base)) + (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :real base)) + (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :imag base)) + ;; + #+long-float + (defun ea-for-clf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :real base)) + #+long-float + (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :imag base)) + + #+double-double + (defun ea-for-cddf-real-hi-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :real-hi base)) + #+double-double + (defun ea-for-cddf-real-lo-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :real-lo base)) + #+double-double + (defun ea-for-cddf-imag-hi-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :imag-hi base)) + #+double-double + (defun ea-for-cddf-imag-lo-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :imag-lo base)) + ) + +;;; The x86 can't store a long-float to memory without popping the +;;; stack and marking a register as empty, so it is necessary to +;;; restore the register from memory. +(defun store-long-float (ea) + (inst fstpl ea) + (inst fldl ea)) + + +;;;; Move functions: + +;;; x is source, y is destination +(define-move-function (load-single 2) (vop x y) + ((single-stack) (single-reg)) + (inst movss y (ea-for-sf-stack x))) + +(define-move-function (store-single 2) (vop x y) + ((single-reg) (single-stack)) + (inst movss (ea-for-sf-stack y) x)) + +(define-move-function (load-double 2) (vop x y) + ((double-stack) (double-reg)) + (inst movsd y (ea-for-df-stack x))) + +(define-move-function (store-double 2) (vop x y) + ((double-reg) (double-stack)) + (inst movsd (ea-for-df-stack y) x)) + +#+long-float +(define-move-function (load-long 2) (vop x y) + ((long-stack) (long-reg)) + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-stack x)))) + +#+long-float +(define-move-function (store-long 2) (vop x y) + ((long-reg) (long-stack)) + (cond ((zerop (tn-offset x)) + (store-long-float (ea-for-lf-stack y))) + (t + (inst fxch x) + (store-long-float (ea-for-lf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +(define-move-function (load-fp-constant 2) (vop x y) + ((fp-constant) (single-reg double-reg)) + (let ((value (c::constant-value (c::tn-leaf x)))) + (cond ((and (zerop value) + (= (float-sign value) 1)) + (sc-case y + (single-reg (inst xorps y y)) + (double-reg (inst xorpd y y)))) + (t + (warn (intl:gettext "Ignoring bogus i387 Constant ~a") value))))) + + +;;;; Complex float move functions + +(defun complex-single-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg *backend*) + :offset (tn-offset x))) +(defun complex-single-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg *backend*) + :offset (1+ (tn-offset x)))) + +(defun complex-double-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (tn-offset x))) +(defun complex-double-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (1+ (tn-offset x)))) + +#+long-float +(defun complex-long-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg *backend*) + :offset (tn-offset x))) +#+long-float +(defun complex-long-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg *backend*) + :offset (1+ (tn-offset x)))) + +#+double-double +(progn +(defun complex-double-double-reg-real-hi-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (tn-offset x))) +(defun complex-double-double-reg-real-lo-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (+ 1 (tn-offset x)))) +(defun complex-double-double-reg-imag-hi-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (+ 2 (tn-offset x)))) +(defun complex-double-double-reg-imag-lo-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (+ 3 (tn-offset x)))) +) +;;; x is source, y is destination +(define-move-function (load-complex-single 2) (vop x y) + ((complex-single-stack) (complex-single-reg)) + (inst movlps y (ea-for-csf-real-stack x))) + +(define-move-function (store-complex-single 2) (vop x y) + ((complex-single-reg) (complex-single-stack)) + (inst movlps (ea-for-csf-real-stack y) x)) + +(define-move-function (load-complex-double 2) (vop x y) + ((complex-double-stack) (complex-double-reg)) + (inst movupd y (ea-for-cdf-real-stack x))) + +(define-move-function (store-complex-double 2) (vop x y) + ((complex-double-reg) (complex-double-stack)) + (inst movupd (ea-for-cdf-real-stack y) x)) + +#+long-float +(define-move-function (load-complex-long 2) (vop x y) + ((complex-long-stack) (complex-long-reg)) + (let ((real-tn (complex-long-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldl (ea-for-clf-real-stack x)))) + (let ((imag-tn (complex-long-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldl (ea-for-clf-imag-stack x))))) + +#+long-float +(define-move-function (store-complex-long 2) (vop x y) + ((complex-long-reg) (complex-long-stack)) + (let ((real-tn (complex-long-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (store-long-float (ea-for-clf-real-stack y))) + (t + (inst fxch real-tn) + (store-long-float (ea-for-clf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (inst fxch imag-tn) + (store-long-float (ea-for-clf-imag-stack y)) + (inst fxch imag-tn))) + +#+double-double +(progn +(define-move-function (load-complex-double-double 4) (vop x y) + ((complex-double-double-stack) (complex-double-double-reg)) + (let ((real-tn (complex-double-double-reg-real-hi-tn y))) + (inst movsd real-tn (ea-for-cddf-real-hi-stack x))) + (let ((real-tn (complex-double-double-reg-real-lo-tn y))) + (inst movsd real-tn (ea-for-cddf-real-lo-stack x))) + (let ((imag-tn (complex-double-double-reg-imag-hi-tn y))) + (inst movsd imag-tn (ea-for-cddf-imag-hi-stack x))) + (let ((imag-tn (complex-double-double-reg-imag-lo-tn y))) + (inst movsd imag-tn (ea-for-cddf-imag-lo-stack x)))) + +(define-move-function (store-complex-double-double 4) (vop x y) + ((complex-double-double-reg) (complex-double-double-stack)) + ;; FIXME: These may not be right!!!! + (let ((real-tn (complex-double-double-reg-real-hi-tn x))) + (inst movsd (ea-for-cddf-real-hi-stack y) real-tn)) + (let ((real-tn (complex-double-double-reg-real-lo-tn x))) + (inst movsd (ea-for-cddf-real-lo-stack y) real-tn)) + (let ((imag-tn (complex-double-double-reg-imag-hi-tn x))) + (inst movsd (ea-for-cddf-imag-hi-stack y) imag-tn)) + (let ((imag-tn (complex-double-double-reg-imag-lo-tn x))) + (inst movsd (ea-for-cddf-imag-lo-stack y) imag-tn))) + +) + +;;;; Move VOPs: + +;;; +;;; Float register to register moves. +;;; +#+nil +(define-vop (float-move) + (:args (x)) + (:results (y)) + (:note _N"float move") + (:generator 0 + (unless (location= x y) + (inst movq y x)))) + +(define-vop (float-move/single) + (:args (x)) + (:results (y)) + (:note _N"float move") + (:temporary (:sc single-stack) temp) + (:generator 0 + (unless (location= x y) + (let ((x-offset (tn-offset x)) + (y-offset (tn-offset y))) + (cond ((and (zerop x-offset) + (>= y-offset 8)) + ;; Move fr0 to xmm + (inst fst (ea-for-sf-stack temp)) + (inst movss y (ea-for-sf-stack temp))) + ((and (>= x-offset 8) + (>= y-offset 8)) + (inst movq y x)) + (t + (error "Don't know how to move ~S to ~S" x y))))))) + +(define-vop (float-move/double) + (:args (x)) + (:results (y)) + (:note _N"float move") + (:temporary (:sc double-stack) temp) + (:generator 0 + (unless (location= x y) + (let ((x-offset (tn-offset x)) + (y-offset (tn-offset y))) + (cond ((and (zerop x-offset) + (>= y-offset 8)) + ;; Move fr0 to xmm + (inst fstd (ea-for-df-stack temp)) + (inst movsd y (ea-for-df-stack temp))) + ((and (>= x-offset 8) + (>= y-offset 8)) + (inst movq y x)) + (t + (error "Don't know how to move ~S to ~S" x y))))))) + +(define-vop (single-move float-move/single) + (:args (x :scs (single-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (single-reg) :load-if (not (location= x y))))) + +(define-move-vop single-move :move (single-reg) (single-reg)) + +(define-vop (double-move float-move/double) + (:args (x :scs (double-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (double-reg) :load-if (not (location= x y))))) +(define-move-vop double-move :move (double-reg) (double-reg)) + +#+long-float +(define-vop (long-move float-move) + (:args (x :scs (long-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (long-reg) :load-if (not (location= x y))))) +#+long-float +(define-move-vop long-move :move (long-reg) (long-reg)) + +;;; +;;; Complex float register to register moves. +;;; +(define-vop (complex-single-move) + (:args (x :scs (complex-single-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (complex-single-reg) :load-if (not (location= x y)))) + (:generator 0 + (unless (location= x y) + (inst movaps y x)))) + +(define-move-vop complex-single-move :move + (complex-single-reg) (complex-single-reg)) + +(define-vop (complex-double-move) + (:args (x :scs (complex-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-double-reg) :load-if (not (location= x y)))) + (:generator 0 + (unless (location= x y) + (inst movapd y x)))) + +(define-move-vop complex-double-move :move + (complex-double-reg) (complex-double-reg)) + + +#+long-float +(define-vop (complex-long-move complex-float-move) + (:args (x :scs (complex-long-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-long-reg) :load-if (not (location= x y))))) +#+long-float +(define-move-vop complex-long-move :move + (complex-long-reg) (complex-long-reg)) + + +;;; +;;; Move from float to a descriptor reg. allocating a new float +;;; object in the process. +;;; +(define-vop (move-from-single) + (:args (x :scs (single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:single-float-type vm:single-float-size node) + (inst movss (ea-for-sf-desc y) x)))) +(define-move-vop move-from-single :move + (single-reg) (descriptor-reg)) + +(define-vop (move-from-double) + (:args (x :scs (double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:double-float-type vm:double-float-size node) + (inst movsd (ea-for-df-desc y) x)))) +(define-move-vop move-from-double :move + (double-reg) (descriptor-reg)) + +#+long-float +(define-vop (move-from-long) + (:args (x :scs (long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:long-float-type vm:long-float-size node) + (with-tn@fp-top(x) + (store-long-float (ea-for-lf-desc y)))))) +#+long-float +(define-move-vop move-from-long :move + (long-reg) (descriptor-reg)) + +(define-vop (move-from-fp-constant) + (:args (x :scs (fp-constant))) + (:results (y :scs (descriptor-reg))) + (:generator 2 + (ecase (c::constant-value (c::tn-leaf x)) + (0f0 (load-symbol-value y *fp-constant-0s0*)) + #+nil + (1f0 (load-symbol-value y *fp-constant-1s0*)) + (0d0 (load-symbol-value y *fp-constant-0d0*)) + #+nil + (1d0 (load-symbol-value y *fp-constant-1d0*))))) +(define-move-vop move-from-fp-constant :move + (fp-constant) (descriptor-reg)) + +;;; +;;; Move from a descriptor to a float register +;;; +(define-vop (move-to-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (single-reg))) + (:note _N"pointer to float coercion") + (:generator 2 + (inst movss y (ea-for-sf-desc x)))) +(define-move-vop move-to-single :move (descriptor-reg) (single-reg)) + +(define-vop (move-to-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-reg))) + (:note _N"pointer to float coercion") + (:generator 2 + (inst movsd y (ea-for-df-desc x)))) +(define-move-vop move-to-double :move (descriptor-reg) (double-reg)) + +#+long-float +(define-vop (move-to-long) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (long-reg))) + (:note _N"pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-desc x))))) +#+long-float +(define-move-vop move-to-long :move (descriptor-reg) (long-reg)) + + +;;; +;;; Move from complex float to a descriptor reg. allocating a new +;;; complex float object in the process. +;;; +(define-vop (move-from-complex-single) + (:args (x :scs (complex-single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:complex-single-float-type + vm:complex-single-float-size node) + (inst movlps (ea-for-csf-real-desc y) x)))) +(define-move-vop move-from-complex-single :move + (complex-single-reg) (descriptor-reg)) + +(define-vop (move-from-complex-double) + (:args (x :scs (complex-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:complex-double-float-type + vm:complex-double-float-size node) + (inst movupd (ea-for-cdf-real-desc y) x)))) + +(define-move-vop move-from-complex-double :move + (complex-double-reg) (descriptor-reg)) + +#+long-float +(define-vop (move-from-complex-long) + (:args (x :scs (complex-long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:complex-long-float-type + vm:complex-long-float-size node) + (let ((real-tn (complex-long-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (store-long-float (ea-for-clf-real-desc y)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (store-long-float (ea-for-clf-imag-desc y))))))) +#+long-float +(define-move-vop move-from-complex-long :move + (complex-long-reg) (descriptor-reg)) + +#+double-double +(define-vop (move-from-complex-double-double) + (:args (x :scs (complex-double-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"complex double-double float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm::complex-double-double-float-type + vm::complex-double-double-float-size node) + (let ((real-tn (complex-double-double-reg-real-hi-tn x))) + (inst movsd (ea-for-cddf-real-hi-desc y) real-tn)) + (let ((real-tn (complex-double-double-reg-real-lo-tn x))) + (inst movsd (ea-for-cddf-real-lo-desc y) real-tn)) + (let ((imag-tn (complex-double-double-reg-imag-hi-tn x))) + (inst movsd (ea-for-cddf-imag-hi-desc y) imag-tn)) + (let ((imag-tn (complex-double-double-reg-imag-lo-tn x))) + (inst movsd (ea-for-cddf-imag-lo-desc y) imag-tn))))) +;;; +#+double-double +(define-move-vop move-from-complex-double-double :move + (complex-double-double-reg) (descriptor-reg)) +;;; +;;; Move from a descriptor to a complex float register +;;; +(define-vop (move-to-complex-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (complex-single-reg))) + (:note _N"pointer to complex float coercion") + (:generator 2 + (inst movlps y (ea-for-csf-real-desc x)))) + +(define-move-vop move-to-complex-single :move + (descriptor-reg) (complex-single-reg)) + +(define-vop (move-to-complex-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (complex-double-reg))) + (:note _N"pointer to complex float coercion") + (:generator 2 + (inst movupd y (ea-for-cdf-real-desc x)))) + +(define-move-vop move-to-complex-double :move + (descriptor-reg) (complex-double-reg)) + + +;;; +;;; The move argument vops. +;;; +;;; Note these are also used to stuff fp numbers onto the c-call stack +;;; so the order is different than the lisp-stack. + +;;; The general move-argument vop +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note _N"float argument move") + (:generator ,(case format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (inst movq y x))) + (,stack-sc + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) word-bytes)) + (ea (make-ea :dword :base fp :disp offset))) + ,@(ecase format + (:single '((inst movss ea x))) + (:double '((inst movsd ea x))))) + (let ((ea (make-ea + :dword :base fp + :disp (- (* (+ (tn-offset y) + ,(case format + (:single 1) + (:double 2) + (:long 3))) + vm:word-bytes))))) + ,@(ecase format + (:single '((inst movss ea x))) + (:double '((inst movsd ea x)))))))))) + (define-move-vop ,name :move-argument + (,sc descriptor-reg) (,sc))))) + (frob move-single-float-argument single-reg single-stack :single) + (frob move-double-float-argument double-reg double-stack :double)) + +;;;; Complex float move-argument vop +(define-vop (move-complex-single-float-argument) + (:args (x :scs (complex-single-reg) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y complex-single-reg)))) + (:results (y)) + (:note _N"complex float argument move") + (:generator 3 + (sc-case y + (complex-single-reg + (unless (location= x y) + (inst movaps y x))) + (complex-single-stack + (inst movlps (ea-for-csf-real-stack y fp) x))))) + +(define-move-vop move-complex-single-float-argument :move-argument + (complex-single-reg descriptor-reg) (complex-single-reg)) + +(define-vop (move-complex-double-float-argument) + (:args (x :scs (complex-double-reg) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y complex-double-reg)))) + (:results (y)) + (:note _N"complex float argument move") + (:generator 3 + (sc-case y + (complex-double-reg + (unless (location= x y) + (inst movapd y x))) + (complex-double-stack + (inst movupd (ea-for-cdf-real-stack y fp) x))))) + +(define-move-vop move-complex-double-float-argument :move-argument + (complex-double-reg descriptor-reg) (complex-double-reg)) + +#+double-double +(define-vop (move-complex-double-double-float-argument) + (:args (x :scs (complex-double-double-reg) :target y) + (fp :scs (any-reg) :load-if (not (sc-is y complex-double-double-reg)))) + (:results (y)) + (:note _N"complex double-double-float argument move") + (:generator 2 + (sc-case y + (complex-double-double-reg + (unless (location= x y) + (let ((x-real (complex-double-double-reg-real-hi-tn x)) + (y-real (complex-double-double-reg-real-hi-tn y))) + (inst movsd y-real x-real)) + (let ((x-real (complex-double-double-reg-real-lo-tn x)) + (y-real (complex-double-double-reg-real-lo-tn y))) + (inst movsd y-real x-real)) + (let ((x-imag (complex-double-double-reg-imag-hi-tn x)) + (y-imag (complex-double-double-reg-imag-hi-tn y))) + (inst movsd y-imag x-imag)) + (let ((x-imag (complex-double-double-reg-imag-lo-tn x)) + (y-imag (complex-double-double-reg-imag-lo-tn y))) + (inst movsd y-imag x-imag)))) + (complex-double-double-stack + (let ((real-tn (complex-double-double-reg-real-hi-tn x))) + (inst movsd (ea-for-cddf-real-hi-stack y fp) real-tn)) + (let ((real-tn (complex-double-double-reg-real-lo-tn x))) + (inst movsd (ea-for-cddf-real-lo-stack y fp) real-tn)) + (let ((imag-tn (complex-double-double-reg-imag-hi-tn x))) + (inst movsd (ea-for-cddf-imag-hi-stack y fp) imag-tn)) + (let ((imag-tn (complex-double-double-reg-imag-lo-tn x))) + (inst movsd (ea-for-cddf-imag-lo-stack y fp) imag-tn)))) + )) + +#+double-double +(define-move-vop move-complex-double-double-float-argument :move-argument + (complex-double-double-reg descriptor-reg) (complex-double-double-reg)) + +(define-move-vop move-argument :move-argument + (single-reg double-reg #+long-float long-reg + #+double-double double-double-reg + complex-single-reg complex-double-reg #+long-float complex-long-reg + #+double-double complex-double-double-reg) + (descriptor-reg)) + + +;;;; Arithmetic VOPs: + + +;;; dtc: The floating point arithmetic vops. +;;; +;;; Note: Although these can accept x and y on the stack or pointed to +;;; from a descriptor register, they will work with register loading +;;; without these. Same deal with the result - it need only be a +;;; register. When load-tns are needed they will probably be in ST0 +;;; and the code below should be able to correctly handle all cases. +;;; +;;; However it seems to produce better code if all arg. and result +;;; options are used; on the P86 there is no extra cost in using a +;;; memory operand to the FP instructions - not so on the PPro. +;;; +;;; It may also be useful to handle constant args? +;;; +;;; 22-Jul-97: descriptor args lose in some simple cases when +;;; a function result computed in a loop. Then Python insists +;;; on consing the intermediate values! For example +#| +(defun test(a n) + (declare (type (simple-array double-float (*)) a) + (fixnum n)) + (let ((sum 0d0)) + (declare (type double-float sum)) + (dotimes (i n) + (incf sum (* (aref a i)(aref a i)))) + sum)) +|# +;;; So, disabling descriptor args until this can be fixed elsewhere. +;;; + +(define-vop (float-op) + (:args (x) (y)) + (:results (r)) + (:policy :fast-safe) + (:note _N"inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only)) + +(macrolet ((frob (name sc ptype) + `(define-vop (,name float-op) + (:args (x :scs (,sc) :target r) + (y :scs (,sc))) + (:results (r :scs (,sc))) + (:arg-types ,ptype ,ptype) + (:result-types ,ptype)))) + (frob single-float-op single-reg single-float) + (frob double-float-op double-reg double-float)) + +(macrolet ((generate (movinst opinst commutative arg-type) + (multiple-value-bind (rtype stack-sc ea ea-stack) + (if (eq arg-type 'single) + (values 'single-reg 'single-stack 'ea-for-sf-desc 'ea-for-sf-stack) + (values 'double-reg 'double-stack 'ea-for-df-desc 'ea-for-df-stack)) + `(progn + (cond + ((location= x r) + ;; x and r are the same. We can just operate on x, + ;; and we're done. + (sc-case y + (,rtype + (inst ,opinst x y)) + (descriptor-reg + (inst ,opinst x (,ea y))) + (,stack-sc + (inst ,opinst x (,ea-stack y))))) + ((and ,commutative (location= y r)) + ;; y = r and the operation is commutative, so just + ;; do the operation with r and x. + (inst ,opinst y x)) + ((not (location= r y)) + ;; x, y, and r are three different regs. So just + ;; move r to x and do the operation on r. + (inst ,movinst r x) + (sc-case y + (,rtype + (inst ,opinst r y)) + (descriptor-reg + (inst ,opinst r (,ea y))) + (,stack-sc + (inst, opinst r (,ea-stack y))))) + (t + ;; The hard case where the operation is not + ;; commutative, but y might be r. Don't want to + ;; destroy y in this case, so use a temp so we + ;; don't accidentally overwrite y. + (inst ,movinst tmp x) + (sc-case y + (,rtype + (inst ,opinst tmp y)) + (descriptor-reg + (inst ,opinst tmp (,ea y))) + (,stack-sc + (inst, opinst tmp (,ea-stack y)))) + (inst ,movinst r tmp)))))) + (frob (op sinst sname scost dinst dname dcost commutative) + `(progn + (define-vop (,sname single-float-op) + (:args (x :scs (single-reg) :target r) + (y :scs (single-reg descriptor-reg) + :load-if (not (sc-is y single-stack)))) + (:translate ,op) + (:temporary (:sc single-reg) tmp) + (:generator ,scost + (generate movss ,sinst ,commutative single))) + (define-vop (,dname double-float-op) + (:args (x :scs (double-reg) :target r) + (y :scs (double-reg descriptor-reg) + :load-if (not (sc-is y double-stack)))) + (:translate ,op) + (:temporary (:sc double-reg) tmp) + (:generator ,dcost + (generate movsd ,dinst ,commutative double)))))) + (frob + addss +/single-float 2 addsd +/double-float 2 t) + (frob - subss -/single-float 2 subsd -/double-float 2 nil) + (frob * mulss */single-float 4 mulsd */double-float 5 t) + (frob / divss //single-float 12 divsd //double-float 19 nil)) + +(define-vop (fsqrt) + (:args (x :scs (double-reg))) + (:results (y :scs (double-reg))) + (:translate %sqrt) + (:policy :fast-safe) + (:arg-types double-float) + (:result-types double-float) + (:note _N"inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (inst sqrtsd y x))) + +(macrolet ((frob ((name translate mov sc type) &body body) + `(define-vop (,name) + (:args (x :scs (,sc))) + (:results (y :scs (,sc))) + (:translate ,translate) + (:policy :fast-safe) + (:arg-types ,type) + (:result-types ,type) + (:temporary (:sc ,sc) tmp) + (:note _N"inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (inst pcmpeqd tmp tmp) ; all 1's + ;; we should be able to do this better. what we + ;; really would like to do is use the target as the + ;; temp whenever it's not also the source + (unless (location= x y) + (inst ,mov y x)) + ,@body)))) + (frob (%negate/double-float %negate movsd double-reg double-float) + (inst psllq tmp 63) ; tmp = #x8000000000000000 + (inst xorpd y tmp)) + (frob (%negate/single-float %negate movss single-reg single-float) + (inst pslld tmp 31) ; tmp = #x80000000 + (inst xorps y tmp)) + (frob (abs/double-float abs movsd double-reg double-float) + (inst psrlq tmp 1) ; tmp = #x7fffffffffffffff + (inst andpd y tmp)) + (frob (abs/single-float abs movss single-reg single-float) + (inst psrld tmp 1) ; tmp = #x7fffffff + (inst andps y tmp))) + + +;;;; Comparison: + +#+long-float +(deftransform eql ((x y) (long-float long-float)) + `(and (= (long-float-low-bits x) (long-float-low-bits y)) + (= (long-float-high-bits x) (long-float-high-bits y)) + (= (long-float-exp-bits x) (long-float-exp-bits y)))) + +#+double-double +(deftransform eql ((x y) (double-double-float double-double-float)) + '(and (eql (double-double-hi x) (double-double-hi y)) + (eql (double-double-lo x) (double-double-lo y)))) + + +;;;; comparison + +(define-vop (float-compare) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note _N"inline float comparison")) + +;;; comiss and comisd can cope with one or other arg in memory: we +;;; could (should, indeed) extend these to cope with descriptor args +;;; and stack args + +(define-vop (single-float-compare float-compare) + (:args (x :scs (single-reg)) (y :scs (single-reg descriptor-reg))) + (:conditional) + (:arg-types single-float single-float)) +(define-vop (double-float-compare float-compare) + (:args (x :scs (double-reg)) (y :scs (double-reg descriptor-reg))) + (:conditional) + (:arg-types double-float double-float)) + +(define-vop (=/single-float single-float-compare) + (:translate =) + (:info target not-p) + (:vop-var vop) + (:generator 3 + (note-this-location vop :internal-error) + (sc-case y + (single-reg + (inst ucomiss x y)) + (descriptor-reg + (inst ucomiss x (ea-for-sf-desc y)))) + ;; if PF&CF, there was a NaN involved => not equal + ;; otherwise, ZF => equal + (cond (not-p + (inst jmp :p target) + (inst jmp :ne target)) + (t + (let ((not-lab (gen-label))) + (inst jmp :p not-lab) + (inst jmp :e target) + (emit-label not-lab)))))) + +(define-vop (=/double-float double-float-compare) + (:translate =) + (:info target not-p) + (:vop-var vop) + (:generator 3 + (note-this-location vop :internal-error) + (sc-case y + (double-reg + (inst ucomisd x y)) + (descriptor-reg + (inst ucomisd x (ea-for-df-desc y)))) + (cond (not-p + (inst jmp :p target) + (inst jmp :ne target)) + (t + (let ((not-lab (gen-label))) + (inst jmp :p not-lab) + (inst jmp :e target) + (emit-label not-lab)))))) + +(define-vop (</double-float double-float-compare) + (:translate <) + (:info target not-p) + (:generator 3 + (sc-case y + (double-reg + (inst comisd x y)) + (descriptor-reg + (inst comisd x (ea-for-df-desc y)))) + (cond (not-p + (inst jmp :p target) + (inst jmp :nc target)) + (t + (let ((not-lab (gen-label))) + (inst jmp :p not-lab) + (inst jmp :c target) + (emit-label not-lab)))))) + +(define-vop (</single-float single-float-compare) + (:translate <) + (:info target not-p) + (:generator 3 + (sc-case y + (single-reg + (inst comiss x y)) + (descriptor-reg + (inst comiss x (ea-for-sf-desc y)))) + (cond (not-p + (inst jmp :p target) + (inst jmp :nc target)) + (t + (let ((not-lab (gen-label))) + (inst jmp :p not-lab) + (inst jmp :c target) + (emit-label not-lab)))))) + +(define-vop (>/double-float double-float-compare) + (:translate >) + (:info target not-p) + (:generator 3 + (sc-case y + (double-reg + (inst comisd x y)) + (descriptor-reg + (inst comisd x (ea-for-df-desc y)))) + (cond (not-p + (inst jmp :p target) + (inst jmp :na target)) + (t + (let ((not-lab (gen-label))) + (inst jmp :p not-lab) + (inst jmp :a target) + (emit-label not-lab)))))) + +(define-vop (>/single-float single-float-compare) + (:translate >) + (:info target not-p) + (:generator 3 + (sc-case y + (single-reg + (inst comiss x y)) + (descriptor-reg + (inst comiss x (ea-for-sf-desc y)))) + (cond (not-p + (inst jmp :p target) + (inst jmp :na target)) + (t + (let ((not-lab (gen-label))) + (inst jmp :p not-lab) + (inst jmp :a target) + (emit-label not-lab)))))) + + + +;;;; Conversion: + +(macrolet ((frob (name translate inst to-sc to-type) + `(define-vop (,name) + (:args (x :scs (signed-stack signed-reg))) + (:results (y :scs (,to-sc))) + (:arg-types signed-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note _N"inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case x + (signed-reg + (note-this-location vop :internal-error) + (inst ,inst y x)) + (signed-stack + (note-this-location vop :internal-error) + (inst ,inst y x))))))) + (frob %single-float/signed %single-float cvtsi2ss single-reg single-float) + (frob %double-float/signed %double-float cvtsi2sd double-reg double-float)) + +(macrolet ((frob (name translate inst from-sc from-type to-sc to-type) + (let ((ea (if (eq from-sc 'single-reg) + 'ea-for-sf-desc + 'ea-for-df-desc))) + `(define-vop (,name) + (:args (x :scs (,from-sc descriptor-reg) :target y)) + (:results (y :scs (,to-sc))) + (:arg-types ,from-type) + (:result-types ,to-type) + (:policy :fast-safe) + (:note _N"inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 2 + (note-this-location vop :internal-error) + (sc-case x + (,from-sc + (inst ,inst y x)) + (descriptor-reg + (inst ,inst y (,ea x))))))))) + (frob %single-float/double-float %single-float cvtsd2ss double-reg + double-float single-reg single-float) + + (frob %double-float/single-float %double-float cvtss2sd + single-reg single-float double-reg double-float)) + +(macrolet ((frob (trans inst from-sc from-type round-p) + (declare (ignore round-p)) + (let ((ea (if (eq from-sc 'single-reg) + 'ea-for-sf-desc + 'ea-for-df-desc))) + `(define-vop (,(symbolicate trans "/" from-type)) + (:args (x :scs (,from-sc descriptor-reg))) + (:temporary (:sc any-reg) temp-reg) + (:results (y :scs (signed-reg))) + (:arg-types ,from-type) + (:result-types signed-num) + (:translate ,trans) + (:policy :fast-safe) + (:note _N"inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case y + (signed-stack + (sc-case x + (,from-sc + (inst ,inst temp-reg x)) + (descriptor-reg + (inst ,inst temp-reg (,ea x)))) + (move y temp-reg)) + (signed-reg + (sc-case x + (,from-sc + (inst ,inst y x)) + (descriptor-reg + (inst ,inst y (,ea x))))))))))) + (frob %unary-truncate cvttss2si single-reg single-float nil) + (frob %unary-truncate cvttsd2si double-reg double-float nil) + + (frob %unary-round cvtss2si single-reg single-float t) + (frob %unary-round cvtsd2si double-reg double-float t)) + +(define-vop (fast-unary-ftruncate/single-float) + (:args (x :scs (single-reg descriptor-reg))) + (:arg-types single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:policy :fast-safe) + (:translate c::fast-unary-ftruncate) + (:temporary (:sc signed-reg) temp) + (:note _N"inline ftruncate") + (:generator 2 + (sc-case x + (single-reg + (inst cvttss2si temp x)) + (descriptor-reg + (inst cvttss2si temp (ea-for-sf-desc x)))) + (inst cvtsi2ss r temp))) + +(define-vop (fast-unary-ftruncate/double-float) + (:args (x :scs (double-reg descriptor-reg) :target r)) + (:arg-types double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:policy :fast-safe) + (:translate c::fast-unary-ftruncate) + (:temporary (:sc signed-reg) temp) + (:note _N"inline ftruncate") + (:generator 2 + (sc-case x + (double-reg + (inst cvttsd2si temp x)) + (descriptor-reg + (inst cvttsd2si temp (ea-for-df-desc x)))) + (inst cvtsi2sd r temp))) + +(define-vop (make-single-float) + (:args (bits :scs (signed-reg) :target res + :load-if (not (or (and (sc-is bits signed-stack) + (sc-is res single-reg)) + (and (sc-is bits signed-stack) + (sc-is res single-stack) + (location= bits res)))))) + (:results (res :scs (single-reg single-stack))) + (:arg-types signed-num) + (:result-types single-float) + (:translate make-single-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case res + (single-stack + (sc-case bits + (signed-reg + (inst mov res bits)) + (signed-stack + (assert (location= bits res))))) + (single-reg + (sc-case bits + (signed-reg + (inst movd res bits)) + (signed-stack + (inst movd res bits))))))) + +(define-vop (make-double-float) + (:args (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (double-reg))) + (:arg-types signed-num unsigned-num) + (:result-types double-float) + (:translate make-double-float) + (:temporary (:sc double-reg) temp) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (inst movd temp hi-bits) + (inst psllq temp 32) + (inst movd res lo-bits) + (inst orpd res temp))) + +(define-vop (single-float-bits) + (:args (float :scs (single-reg descriptor-reg) + :load-if (not (sc-is float single-stack)))) + (:results (bits :scs (signed-reg))) + (:arg-types single-float) + (:result-types signed-num) + (:translate single-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case bits + (signed-reg + (sc-case float + (single-reg + (inst movd bits float)) + (single-stack + (move bits float)) + (descriptor-reg + (loadw + bits float vm:single-float-value-slot vm:other-pointer-type)))) + (signed-stack + (sc-case float + (single-reg + (inst movss bits float))))))) + +(define-vop (double-float-high-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (hi-bits :scs (signed-reg))) + (:temporary (:sc double-reg) temp) + (:arg-types double-float) + (:result-types signed-num) + (:translate double-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (inst movq temp float) + (inst psrlq temp 32) + (inst movd hi-bits temp)) + (double-stack + (loadw hi-bits ebp-tn (- (1+ (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + vm:other-pointer-type))))) + +(define-vop (double-float-low-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:arg-types double-float) + (:result-types unsigned-num) + (:translate double-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (inst movd lo-bits float)) + (double-stack + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw lo-bits float vm:double-float-value-slot + vm:other-pointer-type))))) + +(define-vop (double-float-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)) + :to (:result 1))) + (:results (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:arg-types double-float) + (:result-types signed-num unsigned-num) + (:temporary (:sc double-reg) temp) + (:translate kernel::double-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (inst movq temp float) + (inst movd lo-bits temp) + (inst psrlq temp 32) + (inst movd hi-bits temp)) + (double-stack + (loadw hi-bits ebp-tn (- (+ 1 (tn-offset float)))) + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + vm:other-pointer-type) + (loadw lo-bits float vm:double-float-value-slot + vm:other-pointer-type))))) + + +;;;; Float mode hackery: + +(deftype float-modes () '(unsigned-byte 24)) + +;; For the record, here is the format of the MXCSR register. +;; +;; Bit +;; 31-16 Reserved +;; 15 Flush to zero +;; 14-13 Rounding control +;; 12 precision mask (inexact) +;; 11 underflow mask +;; 10 overflow mask +;; 9 divide-by-zero mask +;; 8 denormal operation mask +;; 7 invalid operation mask +;; 6 denormals-are-zeros +;; 5 precision flag (inexact) +;; 4 underflow flag +;; 3 overflow flag +;; 2 divide-by-zero flag +;; 1 denormal operation flag +;; 0 invalid operation flag +;; +;; See below for rounding control +(defknown sse2-floating-point-modes () float-modes (flushable)) +(defknown ((setf sse2-floating-point-modes)) (float-modes) float-modes) + +;; Returns exactly the mxcsr register, except the masks are flipped +;; because we want exception enable flags, not masks. +(define-vop (sse2-floating-point-modes) + (:results (result :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate sse2-floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-stack) temp) + (:generator 3 + (inst stmxcsr temp) + (inst mov result temp) + (inst xor result (ash #x3f 7)))) + +;; Set mxcsr exactly to whatever is given, except we invert the +;; exception enable flags to make them match the exception mask flags. +(define-vop (set-sse2-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:arg-types unsigned-num) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate (setf sse2-floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-stack) cw-stack) + (:temporary (:sc unsigned-reg) temp) + (:generator 8 + ;; The high 16 bits are reserved and will cause a segfault if set, + ;; so clear out those bits. + (inst mov temp new) + (inst and temp #xffff) + (inst xor temp (ash #x3f 7)) ; Convert enables to masks + (inst mov cw-stack temp) + (inst ldmxcsr cw-stack) + (move res new))) + +;; For the record here is the format of the x87 control and status +;; words: +;; +;; Status word: +;; +;; Bit +;; 15 FPU Busy +;; 14 Condition code C3 +;; 13-11 top of stack +;; 10 Condition code C2 +;; 9 Condition code C1 +;; 8 Condition code C0 +;; 7 Error summary status +;; 6 Stack fault +;; 5 precision flag (inexact) +;; 4 underflow flag +;; 3 overflow flag +;; 2 divide-by-zero flag +;; 1 denormal operation flag +;; 0 invalid operation flag +;; +;; Control word +;; +;; Bit +;; 15-13 Reserved +;; 12 Infinity control +;; 11-10 Rounding control +;; 9-8 Precision control +;; 7-6 Reserved +;; 5 precision mask (inexact) +;; 4 underflow mask +;; 3 overflow mask +;; 2 divide-by-zero mask +;; 1 denormal operation mask +;; 0 invalid operation mask +;; +;; Round control: +;; +;; 00 nearest +;; 01 negative infinity +;; 10 positive infinity +;; 11 zero (truncate) +;; +;; Precision control +;; +;; 00 single precision (24 bits) +;; 01 reserved +;; 10 double precision (53 bits) +;; 11 double extended precision (64 bits) + +(defknown x87-floating-point-modes () float-modes (flushable)) +(defknown ((setf x87-floating-point-modes)) (float-modes) + float-modes) + +;; Extract the control and status words from the FPU. The low 16 bits +;; contain the control word, and the high 16 bits contain the status. +(define-vop (x87-floating-point-modes) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate x87-floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-stack) cw-stack) + (:temporary (:sc unsigned-reg :offset eax-offset) sw-reg) + (:generator 8 + (inst fnstsw) + (inst fnstcw cw-stack) + (inst and sw-reg #xff) ; mask exception flags + (inst shl sw-reg 16) + (inst byte #x66) ; operand size prefix + (inst or sw-reg cw-stack) + (inst xor sw-reg #x3f) ; invert exception mask + (move res sw-reg))) + +;; Set the control and status words from the FPU. The low 16 bits +;; contain the control word, and the high 16 bits contain the status. +(define-vop (x87-set-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:results (res :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:result-types unsigned-num) + (:translate (setf x87-floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-stack) cw-stack) + (:temporary (:sc byte-reg :offset al-offset) sw-reg) + (:temporary (:sc unsigned-reg :offset ecx-offset) old) + (:generator 6 + (inst mov cw-stack new) + (inst xor cw-stack #x3f) ; invert exception mask + (inst fnstsw) + (inst fldcw cw-stack) ; always update the control word + (inst mov old new) + (inst shr old 16) + (inst cmp cl-tn sw-reg) ; compare exception flags + (inst jmp :z DONE) ; skip updating the status word + (inst sub esp-tn 28) + (inst fstenv (make-ea :dword :base esp-tn)) + (inst mov (make-ea :byte :base esp-tn :disp 4) cl-tn) + (inst fldenv (make-ea :dword :base esp-tn)) + (inst add esp-tn 28) + DONE + (move res new))) + + +(defun sse2-floating-point-modes () + (sse2-floating-point-modes)) +(defun (setf sse2-floating-point-modes) (new) + (setf (sse2-floating-point-modes) new)) + +(defun x87-floating-point-modes () + (x87-floating-point-modes)) +(defun (setf x87-floating-point-modes) (new) + (setf (x87-floating-point-modes) new)) + + +;;;; Complex float VOPs +(define-vop (make-complex-single-float) + (:translate complex) + (:args (real :scs (single-reg) :to :save) + (imag :scs (single-reg) :to :save)) + (:arg-types single-float single-float) + (:results (r :scs (complex-single-reg) :from (:argument 0) + :load-if (not (sc-is r complex-single-stack)))) + (:result-types complex-single-float) + (:temporary (:sc complex-single-reg) temp) + (:note _N"inline complex single-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-single-reg + ;; x = a + b*i = b|a + (inst xorps temp temp) ; temp = 0|0|0|0 + (inst movss temp real) ; temp = 0|0|0|a + (inst unpcklps temp imag) ; temp = 0|0|b|a + (inst movaps r temp)) + (complex-single-stack + (inst movss (ea-for-csf-real-stack r) real) + (inst movss (ea-for-csf-imag-stack r) imag))))) + +(define-vop (make-complex-double-float) + (:translate complex) + (:args (real :scs (double-reg) :to :save) + (imag :scs (double-reg) :to :save)) + (:arg-types double-float double-float) + (:results (r :scs (complex-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-stack)))) + (:result-types complex-double-float) + (:temporary (:sc complex-double-reg) temp) + (:note _N"inline complex double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-reg + ;; x = a + b*i = b|a + (inst movsd temp real) ; temp = ?|a + (inst unpcklpd temp imag) ; temp = b|a + (inst movapd r temp)) + (complex-double-stack + (inst movsd (ea-for-cdf-real-stack r) real) + (inst movsd (ea-for-cdf-imag-stack r) imag))))) + +(define-vop (realpart/complex-single-float) + (:translate realpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg))) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:temporary (:sc single-reg) temp) + (:policy :fast-safe) + (:note _N"complex float realpart") + (:generator 3 + (sc-case x + (complex-single-reg + (cond ((location= r x) + (inst xorps temp temp) ; temp = 0|0|0|0 + (inst movss temp x) ; temp = 0|0|0|x + (inst movss r temp)) ; r = temp + (t + (inst xorps r r) ; temp = 0|0|0|0 + (inst movss r x)))) ; r = 0|0|0|x + (complex-single-stack + (inst movss r (ea-for-csf-real-stack x))) + (descriptor-reg + (inst movss r (ea-for-csf-real-desc x)))))) + +(define-vop (realpart/complex-double-float) + (:translate realpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg))) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:temporary (:sc double-reg) temp) + (:policy :fast-safe) + (:note "complex float realpart") + (:generator 3 + (sc-case x + (complex-double-reg + (cond ((location= r x) + (inst xorpd temp temp) ; temp = 0|0 + (inst movsd temp x) ; temp = 0|x + (inst movsd r temp)) ; r = temp + (t + (inst xorpd r r) ; r = 0|0 + (inst movsd r x)))) ; r = 0|x + (complex-double-stack + (inst movsd r (ea-for-cdf-real-stack x))) + (descriptor-reg + (inst movsd r (ea-for-cdf-real-desc x)))))) + +(define-vop (imagpart/complex-single-float) + (:translate imagpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg))) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:temporary (:sc complex-single-reg) temp) + (:policy :fast-safe) + (:note _N"complex float imagpart") + (:generator 3 + (sc-case x + (complex-single-reg + ;; x = a+b*i = b|a + ;; Get the imag part to the low part of temp. We don't care about + ;; the other parts of r. + (inst movaps temp x) ; temp = u|u|b|a + (inst shufps temp x #b01) ; temp = a|a|a|b + (inst xorps r r) ; r = 0|0|0|0 + (inst movss r temp) ; r = 0|0|0|b + ) + (complex-single-stack + (inst movss r (ea-for-csf-imag-stack x))) + (descriptor-reg + (inst movss r (ea-for-csf-imag-desc x)))))) + +(define-vop (imagpart/complex-double-float) + (:translate imagpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg))) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:temporary (:sc complex-double-reg) temp) + (:policy :fast-safe) + (:note _N"complex float imagpart") + (:generator 3 + (sc-case x + (complex-double-reg + (cond ((location= r x) + (inst xorpd temp temp) ; temp = 0|0 + (inst movhlps temp x) ; temp = 0|b + (inst movsd r temp)) ; r = temp + (t + (inst xorpd r r) ; r = 0|0 + (inst movhlps r x)))) ; r = 0|b + (complex-double-stack + (inst movsd r (ea-for-cdf-imag-stack x))) + (descriptor-reg + (inst movsd r (ea-for-cdf-imag-desc x)))))) + +;;; A hack dummy VOP to bias the representation selection of its +;;; argument towards a FP register which can help avoid consing at +;;; inappropriate locations. + +(defknown double-float-reg-bias (double-float) (values)) +;;; +(define-vop (double-float-reg-bias) + (:translate double-float-reg-bias) + (:args (x :scs (double-reg double-stack) :load-if nil)) + (:arg-types double-float) + (:policy :fast-safe) + (:note _N"inline dummy FP register bias") + (:ignore x) + (:generator 0)) + +(defknown single-float-reg-bias (single-float) (values)) +;;; +(define-vop (single-float-reg-bias) + (:translate single-float-reg-bias) + (:args (x :scs (single-reg single-stack) :load-if nil)) + (:arg-types single-float) + (:policy :fast-safe) + (:note _N"inline dummy FP register bias") + (:ignore x) + (:generator 0)) + +;;; Support for double-double floats + +#+double-double +(progn + +(defun double-double-reg-hi-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (tn-offset x))) + +(defun double-double-reg-lo-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*) + :offset (1+ (tn-offset x)))) + +(define-move-function (load-double-double 4) (vop x y) + ((double-double-stack) (double-double-reg)) + (let ((real-tn (double-double-reg-hi-tn y))) + (inst movsd real-tn (ea-for-cdf-real-stack x))) + (let ((imag-tn (double-double-reg-lo-tn y))) + (inst movsd imag-tn (ea-for-cdf-imag-stack x)))) + +(define-move-function (store-double-double 4) (vop x y) + ((double-double-reg) (double-double-stack)) + (let ((real-tn (double-double-reg-hi-tn x))) + (inst movsd (ea-for-cdf-real-stack y) real-tn)) + (let ((imag-tn (double-double-reg-lo-tn x))) + (inst movsd (ea-for-cdf-imag-stack y) imag-tn))) + +;;; Double-double float register to register moves + +(define-vop (double-double-move) + (:args (x :scs (double-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (double-double-reg) :load-if (not (location= x y)))) + (:note _N"double-double float move") + (:generator 0 + (unless (location= x y) + ;; Note the double-float-regs are aligned to every second + ;; float register so there is not need to worry about overlap. + (let ((x-hi (double-double-reg-hi-tn x)) + (y-hi (double-double-reg-hi-tn y))) + (inst movsd y-hi x-hi) + (let ((x-lo (double-double-reg-lo-tn x)) + (y-lo (double-double-reg-lo-tn y))) + (inst movsd y-lo x-lo)))))) +;;; +(define-move-vop double-double-move :move + (double-double-reg) (double-double-reg)) + +;;; Move from a complex float to a descriptor register allocating a +;;; new complex float object in the process. + +(define-vop (move-from-double-double) + (:args (x :scs (double-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note _N"double double float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y vm:double-double-float-type + vm:double-double-float-size node) + (let ((real-tn (double-double-reg-hi-tn x))) + (inst movsd (ea-for-cdf-real-desc y) real-tn)) + (let ((imag-tn (double-double-reg-lo-tn x))) + (inst movsd (ea-for-cdf-imag-desc y) imag-tn))))) +;;; +(define-move-vop move-from-double-double :move + (double-double-reg) (descriptor-reg)) + +;;; Move from a descriptor to a double-double float register + +(define-vop (move-to-double-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-double-reg))) + (:note _N"pointer to double-double-float coercion") + (:generator 2 + (let ((real-tn (double-double-reg-hi-tn y))) + (inst movsd real-tn (ea-for-cdf-real-desc x))) + (let ((imag-tn (double-double-reg-lo-tn y))) + (inst movsd imag-tn (ea-for-cdf-imag-desc x))))) + +(define-move-vop move-to-double-double :move + (descriptor-reg) (double-double-reg)) + +;;; double-double float move-argument vop + +(define-vop (move-double-double-float-argument) + (:args (x :scs (double-double-reg) :target y) + (fp :scs (any-reg) :load-if (not (sc-is y double-double-reg)))) + (:results (y)) + (:note _N"double double-float argument move") + (:generator 2 + (sc-case y + (double-double-reg + (unless (location= x y) + (let ((x-real (double-double-reg-hi-tn x)) + (y-real (double-double-reg-hi-tn y))) + (inst movsd y-real x-real)) + (let ((x-imag (double-double-reg-lo-tn x)) + (y-imag (double-double-reg-lo-tn y))) + (inst movsd y-imag x-imag)))) + (double-double-stack + (let ((hi-tn (double-double-reg-hi-tn x))) + (inst movsd (ea-for-cdf-real-stack y fp) hi-tn)) + (let ((lo-tn (double-double-reg-lo-tn x))) + (inst movsd (ea-for-cdf-imag-stack y fp) lo-tn)))))) + +(define-move-vop move-double-double-float-argument :move-argument + (double-double-reg descriptor-reg) (double-double-reg)) + + +(define-vop (move-to-complex-double-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (complex-double-double-reg))) + (:note _N"pointer to complex float coercion") + (:generator 2 + (let ((real-tn (complex-double-double-reg-real-hi-tn y))) + (inst movsd real-tn (ea-for-cddf-real-hi-desc x))) + (let ((real-tn (complex-double-double-reg-real-lo-tn y))) + (inst movsd real-tn (ea-for-cddf-real-lo-desc x))) + (let ((imag-tn (complex-double-double-reg-imag-hi-tn y))) + (inst movsd imag-tn (ea-for-cddf-imag-hi-desc x))) + (let ((imag-tn (complex-double-double-reg-imag-lo-tn y))) + (inst movsd imag-tn (ea-for-cddf-imag-lo-desc x))))) + +(define-move-vop move-to-complex-double-double :move + (descriptor-reg) (complex-double-double-reg)) + + +(define-vop (make/double-double-float) + (:args (hi :scs (double-reg) :target r + :load-if (not (location= hi r))) + (lo :scs (double-reg) :to :save)) + (:results (r :scs (double-double-reg) :from (:argument 0) + :load-if (not (sc-is r double-double-stack)))) + (:arg-types double-float double-float) + (:result-types double-double-float) + (:translate kernel::%make-double-double-float) + (:note _N"inline double-double-float creation") + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case r + (double-double-reg + (let ((r-real (double-double-reg-hi-tn r))) + (unless (location= hi r-real) + (inst movsd r-real hi))) + (let ((r-imag (double-double-reg-lo-tn r))) + (unless (location= lo r-imag) + (inst movsd r-imag lo)))) + (double-double-stack + (unless (location= hi r) + (inst movsd (ea-for-cdf-real-stack r) hi)) + (inst movsd (ea-for-cdf-imag-stack r) lo))))) + +(define-vop (double-double-value) + (:args (x :target r)) + (:results (r)) + (:variant-vars offset) + (:policy :fast-safe) + (:generator 3 + (cond ((sc-is x double-double-reg) + (let ((value-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg *backend*) + :offset (+ offset (tn-offset x))))) + (unless (location= value-tn r) + (inst movsd r value-tn)))) + ((sc-is r double-reg) + (let ((ea (sc-case x + (double-double-stack + (ecase offset + (0 (ea-for-cdf-real-stack x)) + (1 (ea-for-cdf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-cdf-real-desc x)) + (1 (ea-for-cdf-imag-desc x))))))) + (inst movsd r ea))) + (t (error "double-double-value VOP failure"))))) + + +(define-vop (hi/double-double-value double-double-value) + (:translate kernel::double-double-hi) + (:args (x :scs (double-double-reg double-double-stack descriptor-reg) + :target r)) + (:arg-types double-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note _N"double-double high part") + (:variant 0)) + +(define-vop (lo/double-double-value double-double-value) + (:translate kernel::double-double-lo) + (:args (x :scs (double-double-reg double-double-stack descriptor-reg) + :target r)) + (:arg-types double-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note _N"double-double low part") + (:variant 1)) + +(define-vop (make-complex-double-double-float) + (:translate complex) + (:args (real :scs (double-double-reg) :target r + :load-if (not (location= real r)) + ) + (imag :scs (double-double-reg) :to :save)) + (:arg-types double-double-float double-double-float) + (:results (r :scs (complex-double-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-double-stack)))) + (:result-types complex-double-double-float) + (:note _N"inline complex double-double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-double-reg + (let ((r-real (complex-double-double-reg-real-hi-tn r)) + (a-real (double-double-reg-hi-tn real))) + (unless (location= a-real r-real) + (inst movsd r-real a-real))) + (let ((r-real (complex-double-double-reg-real-lo-tn r)) + (a-real (double-double-reg-lo-tn real))) + (unless (location= a-real r-real) + (inst movsd r-real a-real))) + (let ((r-imag (complex-double-double-reg-imag-hi-tn r)) + (a-imag (double-double-reg-hi-tn imag))) + (unless (location= a-imag r-imag) + (inst movsd r-imag a-imag))) + (let ((r-imag (complex-double-double-reg-imag-lo-tn r)) + (a-imag (double-double-reg-lo-tn imag))) + (unless (location= a-imag r-imag) + (inst movsd r-imag a-imag)))) + (complex-double-double-stack + (unless (location= real r) + (inst movsd (ea-for-cddf-real-hi-stack r) real)) + (let ((real-lo (double-double-reg-lo-tn real))) + (inst movsd (ea-for-cddf-real-lo-stack r) real-lo)) + (let ((imag-val (double-double-reg-hi-tn imag))) + (inst movsd (ea-for-cddf-imag-hi-stack r) imag-val)) + (let ((imag-val (double-double-reg-lo-tn imag))) + (inst movsd (ea-for-cddf-imag-lo-stack r) imag-val)))))) + +(define-vop (complex-double-double-float-value) + (:args (x :scs (complex-double-double-reg descriptor-reg) :target r + :load-if (not (sc-is x complex-double-double-stack)))) + (:arg-types complex-double-double-float) + (:results (r :scs (double-double-reg))) + (:result-types double-double-float) + (:variant-vars slot) + (:policy :fast-safe) + (:generator 3 + (sc-case x + (complex-double-double-reg + (let ((value-tn (ecase slot + (:real (complex-double-double-reg-real-hi-tn x)) + (:imag (complex-double-double-reg-imag-hi-tn x)))) + (r-hi (double-double-reg-hi-tn r))) + (unless (location= value-tn r-hi) + (inst movsd r-hi value-tn))) + (let ((value-tn (ecase slot + (:real (complex-double-double-reg-real-lo-tn x)) + (:imag (complex-double-double-reg-imag-lo-tn x)))) + (r-lo (double-double-reg-lo-tn r))) + (unless (location= value-tn r-lo) + (inst movsd r-lo value-tn)))) + (complex-double-double-stack + (let ((r-hi (double-double-reg-hi-tn r))) + (inst movsd r-hi (ecase slot + (:real (ea-for-cddf-real-hi-stack x)) + (:imag (ea-for-cddf-imag-hi-stack x))))) + (let ((r-lo (double-double-reg-lo-tn r))) + (inst movsd r-lo (ecase slot + (:real (ea-for-cddf-real-lo-stack x)) + (:imag (ea-for-cddf-imag-lo-stack x)))))) + (descriptor-reg + (let ((r-hi (double-double-reg-hi-tn r))) + (inst movsd r-hi (ecase slot + (:real (ea-for-cddf-real-hi-desc x)) + (:imag (ea-for-cddf-imag-hi-desc x))))) + (let ((r-lo (double-double-reg-lo-tn r))) + (inst movsd r-lo (ecase slot + (:real (ea-for-cddf-real-lo-desc x)) + (:imag (ea-for-cddf-imag-lo-desc x))))))))) + +(define-vop (realpart/complex-double-double-float complex-double-double-float-value) + (:translate realpart) + (:note _N"complex float realpart") + (:variant :real)) + +(define-vop (imagpart/complex-double-double-float complex-double-double-float-value) + (:translate imagpart) + (:note _N"complex float imagpart") + (:variant :imag)) + +); progn + + +;;; Vops for complex arithmetic. These are usually much faster than +;;; the compiler-generated code using deftransforms. + +;; Negate a complex +(macrolet + ((negate-complex (type shift xor amount) + (let ((name (symbolicate "%NEGATE/COMPLEX-" type "-FLOAT")) + (sc-type (symbolicate "COMPLEX-" type "-FLOAT")) + (sc (symbolicate "COMPLEX-" type "-REG"))) + `(define-vop (,name) + (:translate %negate) + (:args (x :scs (,sc) :target r)) + (:arg-types ,sc-type) + (:results (r :scs (,sc))) + (:result-types ,sc-type) + (:policy :fast-safe) + (:temporary (:scs (,sc)) t0) + (:generator 1 + (inst pcmpeqd t0 t0) ; all ones + (inst ,shift t0 ,amount) ; #x8000...0000 + (unless (location= x r) + (inst movaps r x)) + (inst ,xor r t0)))))) + (negate-complex single pslld xorps 31) + (negate-complex double psllq xorpd 63)) + +;; Convert various number types to complex double-floats +(macrolet + ((convert-complex (trans op to from) + (let ((name (symbolicate to "/" from)) + (from-sc (symbolicate from "-REG")) + (from-type (symbolicate from "-FLOAT")) + (to-sc (symbolicate to "-REG")) + (to-type (symbolicate to "-FLOAT"))) + `(define-vop (,name) + (:translate ,trans) + (:args (x :scs (,from-sc) :target r)) + (:arg-types ,from-type) + (:results (r :scs (,to-sc))) + (:result-types ,to-type) + (:policy :fast-safe) + (:generator 1 + ;; NOTE: We don't have 128-bit aligned objects, so we + ;; can't use the stack or descriptors here. + (inst ,op r x)))))) + (convert-complex %complex-double-float cvtps2pd complex-double complex-single) + (convert-complex %complex-single-float cvtpd2ps complex-single complex-double)) + +(macrolet + ((convert-complex (trans op base-ea to from movinst) + (let ((name (symbolicate to "/" from)) + (from-sc (symbolicate from "-REG")) + (from-sc-stack (symbolicate from "-STACK")) + (from-type (symbolicate from "-FLOAT")) + (to-sc (symbolicate to "-REG")) + (to-type (symbolicate to "-FLOAT"))) + `(define-vop (,name) + (:translate ,trans) + (:args (x :scs (,from-sc ,from-sc-stack descriptor-reg) + :target r)) + (:arg-types ,from-type) + (:results (r :scs (,to-sc))) + (:result-types ,to-type) + (:temporary (:sc ,to-sc) temp) + (:policy :fast-safe) + (:generator 1 + (sc-case x + (,from-sc + ;; Need to make sure the imaginary part is zero + (cond ((location= x r) + (inst xorps temp temp) + (inst ,op temp x) + (inst ,movinst r temp)) + (t + (inst xorps r r) + (inst ,op r x)))) + (,from-sc-stack + (inst xorps r r) + (inst ,op r (,(symbolicate "EA-FOR-" base-ea "-STACK") x))) + (descriptor-reg + (inst xorps r r) + (inst ,op r (,(symbolicate "EA-FOR-" base-ea "-DESC") x))))))))) + (convert-complex %complex-double-float cvtss2sd sf complex-double single movapd) + (convert-complex %complex-single-float cvtsd2ss df complex-single double movaps)) + +;; Add and subtract for two complex arguments +(macrolet + ((generate (movinst opinst commutative) + `(cond + ((location= x r) + (inst ,opinst x y)) + ((and ,commutative (location= y r)) + (inst ,opinst y x)) + ((not (location= r y)) + (inst ,movinst r x) + (inst ,opinst r y)) + (t + (inst ,movinst tmp x) + (inst ,opinst tmp y) + (inst ,movinst r tmp)))) + (complex-add/sub (op inst float-type cost &optional commutative) + (let* ((vop-name (symbolicate (symbol-name op) "/COMPLEX-" float-type "-FLOAT")) + (c-type (symbolicate "COMPLEX-" float-type "-FLOAT")) + (complex-reg (symbolicate "COMPLEX-" float-type "-REG"))) + ;; Note: It would probably improve things if we could use + ;; memory operands, but we can't because the instructions + ;; assumed 128-bit alignment, which we can't guarantee. + `(define-vop (,vop-name) + (:args (x :scs (,complex-reg) :target r) + (y :scs (,complex-reg))) + (:results (r :scs (,complex-reg))) + (:arg-types ,c-type ,c-type) + (:result-types ,c-type) + (:policy :fast-safe) + (:note _N"inline complex float arithmetic") + (:translate ,op) + (:temporary (:sc ,complex-reg) tmp) + (:generator ,cost + (generate movaps ,inst ,commutative)))))) + (complex-add/sub + addps single 1 t) + (complex-add/sub + addpd double 1 t) + (complex-add/sub - subps single 1) + (complex-add/sub - subpd double 1)) + +;; Add and subtract a complex and a float +(macrolet + ((generate (movinst opinst) + `(cond + ((location= x r) + (inst ,opinst x rtmp)) + ((not (location= r rtmp)) + (inst ,movinst r x) + (inst ,opinst r rtmp)) + (t + (inst ,movinst tmp x) + (inst ,opinst tmp rtmp) + (inst ,movinst r tmp)))) + (complex-op-float (size op fop base-ea cost) + (let ((vop-name (symbolicate "COMPLEX-" size "-FLOAT-" + op + "-" size "-FLOAT")) + (complex-reg (symbolicate "COMPLEX-" size "-REG")) + (real-reg (symbolicate size "-REG")) + (c-type (symbolicate "COMPLEX-" size "-FLOAT")) + (r-type (symbolicate size "-FLOAT")) + (r-stack (symbolicate size "-STACK")) + (ea-stack (symbolicate "EA-FOR-" base-ea "-STACK")) + (ea-desc (symbolicate "EA-FOR-" base-ea "-DESC")) + (loadinst (ecase size + (single 'movss) + (double 'movsd))) + (movinst (ecase size + (single 'movaps) + (double 'movapd)))) + `(define-vop (,vop-name) + (:args (x :scs (,complex-reg)) + (y :scs (,real-reg ,r-stack descriptor-reg))) + (:results (r :scs (,complex-reg))) + (:arg-types ,c-type ,r-type) + (:result-types ,c-type) + (:policy :fast-safe) + (:note _N"inline complex float/float arithmetic") + (:translate ,op) + (:temporary (:sc ,complex-reg) tmp) + (:temporary (:sc ,real-reg) rtmp) + (:generator ,cost + ;; Clear out high and low parts of temp, which will + ;; eventually hold y. + (inst xorpd rtmp rtmp) + (sc-case y + (,real-reg + (inst ,loadinst rtmp y) + (generate ,movinst ,fop)) + (,r-stack + (let ((ea (,ea-stack y))) + (inst ,loadinst rtmp ea) + (generate ,movinst ,fop))) + (descriptor-reg + (let ((ea (,ea-desc y))) + (inst ,loadinst rtmp ea) + (generate ,movinst ,fop))))))))) + (complex-op-float single + addps sf 1) + (complex-op-float single - subps sf 1) + (complex-op-float double + addpd df 1) + (complex-op-float double - subpd df 1)) + +;; Add a float and a complex +(macrolet + ((generate (movinst opinst) + `(cond + ((location= x r) + (inst ,opinst x rtmp)) + ((not (location= r y)) + (inst ,movinst r x) + (inst ,opinst r rtmp)) + (t + (inst ,movinst tmp x) + (inst ,opinst tmp rtmp) + (inst ,movinst r tmp)))) + (complex-op-float (size op fop base-ea cost) + (let ((vop-name (symbolicate size "-FLOAT-" + op + "-" "COMPLEX-" size "-FLOAT")) + (complex-reg (symbolicate "COMPLEX-" size "-REG")) + (real-reg (symbolicate size "-REG")) + (c-type (symbolicate "COMPLEX-" size "-FLOAT")) + (r-type (symbolicate size "-FLOAT")) + (r-stack (symbolicate size "-STACK")) + (ea-stack (symbolicate "EA-FOR-" base-ea "-STACK")) + (ea-desc (symbolicate "EA-FOR-" base-ea "-DESC")) + (loadinst (ecase size + (single 'movss) + (double 'movsd))) + (movinst (ecase size + (single 'movaps) + (double 'movapd)))) + `(define-vop (,vop-name) + (:args (y :scs (,real-reg ,r-stack descriptor-reg)) + (x :scs (,complex-reg))) + (:results (r :scs (,complex-reg))) + (:arg-types ,r-type ,c-type) + (:result-types ,c-type) + (:policy :fast-safe) + (:note _N"inline complex float/float arithmetic") + (:translate ,op) + (:temporary (:sc ,complex-reg) tmp) + (:temporary (:sc ,real-reg) rtmp) + (:generator ,cost + (inst xorpd rtmp rtmp) + (sc-case y + (,real-reg + (inst ,loadinst rtmp y) + (generate ,movinst ,fop)) + (,r-stack + (let ((ea (,ea-stack y))) + (inst ,loadinst rtmp ea) + (generate ,movinst ,fop))) + (descriptor-reg + (let ((ea (,ea-desc y))) + (inst ,loadinst rtmp ea) + (generate ,movinst ,fop))))))))) + (complex-op-float single + addps sf 1) + (complex-op-float double + addpd df 1)) + +;; Multiply a complex by a float or a float by a complex. +(macrolet + ((complex-*-float (float-type fmul copy cost) + (let* ((vop-name (symbolicate "COMPLEX-" + float-type + "-FLOAT-*-" + float-type + "-FLOAT")) + (vop-name-r (symbolicate float-type + "-FLOAT-*-COMPLEX-" + float-type + "-FLOAT")) + (complex-sc-type (symbolicate "COMPLEX-" float-type "-REG")) + (real-sc-type (symbolicate float-type "-REG")) + (c-type (symbolicate "COMPLEX-" float-type "-FLOAT")) + (r-type (symbolicate float-type "-FLOAT"))) + `(progn + ;; Complex * float + (define-vop (,vop-name) + (:args (x :scs (,complex-sc-type)) + (y :scs (,real-sc-type))) + (:results (r :scs (,complex-sc-type))) + (:arg-types ,c-type ,r-type) + (:result-types ,c-type) + (:policy :fast-safe) + (:note _N"inline complex float arithmetic") + (:translate *) + (:temporary (:scs (,complex-sc-type)) t0) + (:generator ,cost + (inst movaps t0 y) ; t0 = y + (inst ,copy t0 t0) ; t0 = y|y + (unless (location= x r) + (inst movaps r x)) ; r = xi|xr + (inst ,fmul r t0))) + (define-vop (,vop-name-r) + (:args (x :scs (,real-sc-type)) + (y :scs (,complex-sc-type))) + (:results (r :scs (,complex-sc-type))) + (:arg-types ,r-type ,c-type) + (:result-types ,c-type) + (:policy :fast-safe) + (:note _N"inline complex float arithmetic") + (:translate *) + (:temporary (:scs (,complex-sc-type)) t0) + (:generator ,cost + (inst movaps t0 x) ; t0 = 0|x or 0|0|0|x + (inst ,copy t0 t0) ; t0 = x|x or 0|0|x|x + (unless (location= y r) + (inst movaps r y)) ; r = yi|yr or 0|0|yi|yr + (inst ,fmul r t0))))))) + (complex-*-float single mulps unpcklps 4) + (complex-*-float double mulpd unpcklpd 4)) + +;; Divide a complex by a real +(define-vop (complex-double-float-/-double-float) + (:args (x :scs (complex-double-reg)) (y :scs (double-reg))) + (:results (r :scs (complex-double-reg))) + (:arg-types complex-double-float double-float) + (:result-types complex-double-float) + (:policy :fast-safe) + (:note _N"inline complex float arithmetic") + (:translate /) + (:temporary (:sc complex-double-reg) t0) + (:generator 4 + (inst movaps t0 y) ; t0 = u|y + (inst unpcklpd t0 t0) ; t0 = y|y + (unless (location= x r) + (inst movaps r x)) ; r = xi|xr + (inst divpd r t0))) + +(define-vop (complex-single-float-/-single-float) + (:args (x :scs (complex-single-reg)) (y :scs (single-reg))) + (:results (r :scs (complex-single-reg))) + (:arg-types complex-single-float single-float) + (:result-types complex-single-float) + (:policy :fast-safe) + (:note _N"inline complex float arithmetic") + (:translate /) + (:temporary (:sc complex-single-reg) t0 t1) + (:generator 5 + ;; The upper parts of x may contain junk and dividing that by y + ;; may cause spurious signals. Thus, copy the complex number to + ;; the high part. + (inst movaps t0 y) ; t0 = u|u|u|y + (inst shufps t0 t0 0) ; t0 = y|y|y|y + (inst movaps t1 x) ; t1 = u|u|xi|xr + (inst movlhps t1 t1) ; t1 = xi|xr|xi|xr + (inst divps t1 t0) + (inst xorps t0 t0) ; t0 = 0|0|0|0 + (inst movaps r t1) + (inst movlhps r t0))) + +(define-vop (sse3-*/complex-double-float) + (:translate *) + (:args (x :scs (complex-double-reg)) + (y :scs (complex-double-reg))) + (:arg-types complex-double-float complex-double-float) + (:results (r :scs (complex-double-reg))) + (:result-types complex-double-float) + (:policy :fast-safe) + (:temporary (:scs (complex-double-reg)) t1 t2) + (:guard (backend-featurep :sse3)) + (:generator 8 + ;; Basic algorithm from the paper "The Microarchitecture of the + ;; Intel Pentium 4 Processor on 90nm Technololgy" + ;; + ;; This requires SSSE3 instructions (addsubpd, movddup). + ;; + ;; x = a + b*i. In sse2 reg we have: b|a + ;; y = c + d*i. In sse2 reg we have: d|c + (inst movddup t1 x) ; t1 = a|a + (inst mulpd t1 y) ; t1 = a*d|a*c + (inst movapd t2 x) ; t2 = b|a + (inst unpckhpd t2 t2) ; t2 = b|b + (inst mulpd t2 y) ; t2 = b*d|b*c + (inst shufpd t2 t2 1) ; t2 = b*c|b*d + (inst addsubpd t1 t2) ; t2 = a*d+b*c|a*c-b*d + (inst movapd r t1))) + +(define-vop (*/complex-double-float) + (:translate *) + (:args (x :scs (complex-double-reg)) + (y :scs (complex-double-reg))) + (:arg-types complex-double-float complex-double-float) + (:results (r :scs (complex-double-reg))) + (:result-types complex-double-float) + (:policy :fast-safe) + (:temporary (:scs (complex-double-reg)) t0 t1 t2) + (:temporary (:scs (unsigned-reg)) tmp) + (:generator 13 + ;; Basic algorithm from the paper "The Microarchitecture of the + ;; Intel Pentium 4 Processor on 90nm Technololgy" + + ;; x = a+b*i = b|a + ;; y = c+d*i = d|c + ;; r = a*c-b*d + i*(a*d+b*c) + (inst movapd t1 y) ; t1 = d|c + (inst movapd t2 y) ; t2 = d|c + (inst unpcklpd t1 t1) ; t1 = c|c + (inst unpckhpd t2 t2) ; t2 = d|d + (inst mulpd t1 x) ; t1 = b*c|a*c + (inst mulpd t2 x) ; t2 = b*d|a*d + (inst shufpd t2 t2 1) ; t2 = a*d|b*d + (inst mov tmp #x80000000) + (inst movd t0 tmp) ; t0 = 0|0|0|#x80000000 + (inst psllq t0 32) ; t0 = 0|#x80000000,00000000 + (inst xorpd t2 t0) ; t2 = a*d|-b*d + (inst addpd t2 t1) ; t2 = a*d+b*c | a*c-b*d + (inst movapd r t2))) + + +(define-vop (*/complex-single-float) + (:translate *) + (:args (x :scs (complex-single-reg)) + (y :scs (complex-single-reg))) + (:arg-types complex-single-float complex-single-float) + (:results (r :scs (complex-single-reg))) + (:result-types complex-single-float) + (:policy :fast-safe) + (:temporary (:scs (complex-single-reg)) t0 t1 t2) + (:temporary (:scs (unsigned-reg)) tmp) + (:generator 14 + ;; Basic algorithm from the paper "The Microarchitecture of the + ;; Intel Pentium 4 Processor on 90nm Technololgy" + + ;; x = a+b*i = b|a + ;; y = c+d*i = d|c + ;; r = a*c-b*d + i*(a*d+b*c) + (inst movaps t1 y) ; t1 = u|u|d|c + (inst movaps t2 y) ; t2 = u|u|d|c + (inst shufps t1 t1 #b00000000) ; t1 = c|c|c|c + (inst shufps t2 t2 #b01010101) ; t2 = d|d|d|d + (inst mulps t1 x) ; t1 = b*c|a*c + (inst mulps t2 x) ; t2 = b*d|a*d + (inst shufps t2 t2 1) ; t2 = a*d|b*d + (inst mov tmp #x80000000) + (inst movd t0 tmp) ; t0 = 0|0|0|#x80000000 + (inst xorps t2 t0) ; t2 = a*d|-b*d + (inst addps t2 t1) ; t2 = a*d+b*c | a*c-b*d + (inst xorps t1 t1) ; t1 = 0|0|0|0 + (inst movaps r t2) + (inst movlhps r t1))) + +;; Conjugate +(define-vop (conjugate/complex-double-float) + (:translate conjugate) + (:args (z :scs (complex-double-reg))) + (:arg-types complex-double-float) + (:results (r :scs (complex-double-reg))) + (:result-types complex-double-float) + (:policy :fast-safe) + (:temporary (:scs (complex-double-reg)) ztmp) + (:temporary (:scs (unsigned-reg)) tmp) + (:generator 2 + (inst mov tmp #x80000000) + (inst movd ztmp tmp) + (inst psllq ztmp 32) ; ztmp = 0|#x80000000,00000000 + (inst shufpd ztmp ztmp 1) ; ztmp = #x80000000,00000000|0 + (inst xorpd ztmp z) ; ztmp = -xi|xi + (inst movapd r ztmp))) + +(define-vop (conjugate/complex-single-float) + (:translate conjugate) + (:args (z :scs (complex-single-reg))) + (:arg-types complex-single-float) + (:results (r :scs (complex-single-reg))) + (:result-types complex-single-float) + (:policy :fast-safe) + (:temporary (:scs (complex-single-reg)) ztmp) + (:temporary (:scs (unsigned-reg)) tmp) + (:generator 2 + (inst mov tmp #x80000000) + (inst movd ztmp tmp) + (inst psllq ztmp 32) ; ztmp = #x80000000|0 + (inst xorps ztmp z) ; ztmp = -xi|xr + (inst movaps r ztmp)))
===================================== src/compiler/amd64/macros.lisp ===================================== @@ -130,10 +130,10 @@ (n-offset offset)) (ecase (backend-byte-order *target-backend*) (:little-endian - `(inst mov ,n-target + `(inst movzx ,n-target (make-ea :byte :base ,n-source :disp ,n-offset))) (:big-endian - `(inst mov ,n-target + `(inst movzx ,n-target (make-ea :byte :base ,n-source :disp (+ ,n-offset 3)))))))
(defmacro load-foreign-data-symbol (reg name )
===================================== src/compiler/amd64/vm.lisp ===================================== @@ -268,8 +268,7 @@
;; Non-Descriptor characters (base-char-reg registers - :locations #.byte-regs - :reserve-locations (#.ah-offset #.al-offset) + :locations #.dword-regs :constant-scs (immediate) :save-p t :alternate-scs (base-char-stack)) @@ -385,12 +384,13 @@
(eval-when (compile load eval)
-(defconstant byte-sc-names '(base-char-reg byte-reg base-char-stack)) +(defconstant byte-sc-names '(byte-reg)) (defconstant word-sc-names '(word-reg)) (defconstant dword-sc-names '(dword-reg)) (defconstant qword-sc-names '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack - signed-stack unsigned-stack sap-stack single-stack constant)) + signed-stack unsigned-stack sap-stack single-stack constant + base-char-reg base-char-stack))
;;; ;;; added by jrd. I guess the right thing to do is to treat floats
===================================== src/compiler/generic/objdef.lisp ===================================== @@ -123,13 +123,13 @@ single-float double-float #+long-float long-float - #+#.(c:target-featurep :double-double) + #+double-double double-double-float complex complex-single-float complex-double-float #+long-float complex-long-float - #+#.(c:target-featurep :double-double) + #+double-double complex-double-double-float
simple-array @@ -148,12 +148,12 @@ simple-array-single-float simple-array-double-float #+long-float simple-array-long-float - #+#.(c:target-featurep :double-double) + #+double-double simple-array-double-double-float simple-array-complex-single-float simple-array-complex-double-float #+long-float simple-array-complex-long-float - #+#.(c:target-featurep :double-double) + #+double-double simple-array-complex-double-double-float complex-string complex-bit-vector
===================================== src/tools/cross-scripts/cross-x86-amd64.lisp ===================================== @@ -206,6 +206,7 @@ ))
(in-package :vm) +(defvar *num-fixups* 0) (defun fixup-code-object (code offset fixup kind) (declare (type index offset)) (flet ((add-fixup (code offset)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/1459e4fdb40ba5bab7a3d73...