Raymond Toy pushed to branch rtoy-amd64-p1 at cmucl / cmucl

Commits:

8 changed files:

Changes:

  • src/assembly/amd64/assem-rtns.lisp
    ... ... @@ -64,7 +64,8 @@
    64 64
       (inst lea rdi (make-ea :qword :base rbx :disp (- word-bytes)))
    
    65 65
       (inst rep)
    
    66 66
       (inst movs :qword)
    
    67
    -
    
    67
    +  (inst cld)
    
    68
    +  
    
    68 69
       ;; Restore the count.
    
    69 70
       (inst mov rcx rdx)
    
    70 71
     
    
    ... ... @@ -159,6 +160,7 @@
    159 160
       (inst sub rsi word-bytes)
    
    160 161
       (inst rep)
    
    161 162
       (inst movs :qword)
    
    163
    +  (inst cld)
    
    162 164
     
    
    163 165
       ;; Load the register arguments carefully.
    
    164 166
       (loadw rdx rbp-tn -1)
    
    ... ... @@ -274,3 +276,23 @@
    274 276
     
    
    275 277
       (inst jmp (make-ea :byte :base block
    
    276 278
     		     :disp (* unwind-block-entry-pc-slot word-bytes))))
    
    279
    +
    
    280
    +#+assembler
    
    281
    +(define-assembly-routine (closure-tramp
    
    282
    +			  (:return-style :none))
    
    283
    +                         ()
    
    284
    +  (loadw rax-tn rax-tn fdefn-function-slot other-pointer-type)
    
    285
    +  (inst jmp (make-ea :qword :base rax-tn
    
    286
    +		     :disp (- (* closure-function-slot word-bytes)
    
    287
    +			      function-pointer-type))))
    
    288
    +
    
    289
    +#+assembler
    
    290
    +(define-assembly-routine (undefined-tramp
    
    291
    +			  (:return-style :none))
    
    292
    +                         ()
    
    293
    +  (let ((error (generate-error-code nil undefined-symbol-error
    
    294
    +				    (make-random-tn :kind :normal
    
    295
    +						    :sc (sc-or-lose 'descriptor-reg c::*backend*)
    
    296
    +						    :offset 0))))
    
    297
    +    (inst jmp error)
    
    298
    +    (inst ret)))

  • src/compiler/amd64/array.lisp
    ... ... @@ -1234,20 +1234,13 @@
    1234 1234
       (:args (object :scs (descriptor-reg))
    
    1235 1235
     	 (index :scs (unsigned-reg)))
    
    1236 1236
       (:arg-types simple-string positive-fixnum)
    
    1237
    -  (:temporary (:sc unsigned-reg ; byte-reg
    
    1238
    -		   :offset rax-offset ; al-offset
    
    1239
    -		   :target value
    
    1240
    -		   :from (:eval 0) :to (:result 0))
    
    1241
    -	      rax)
    
    1242
    -  (:ignore rax)
    
    1243 1237
       (:results (value :scs (base-char-reg)))
    
    1244 1238
       (:result-types base-char)
    
    1245 1239
       (:generator 5
    
    1246
    -    (inst mov al-tn
    
    1247
    -	  (make-ea :byte :base object :index index :scale 1
    
    1240
    +    (inst movzx value
    
    1241
    +	  (make-ea :word :base object :index index :scale 2
    
    1248 1242
     		   :disp (- (* vector-data-offset word-bytes)
    
    1249
    -			    other-pointer-type)))
    
    1250
    -    (move value al-tn)))
    
    1243
    +			    other-pointer-type)))))
    
    1251 1244
     
    
    1252 1245
     (define-vop (data-vector-ref-c/simple-string)
    
    1253 1246
       (:translate data-vector-ref)
    
    ... ... @@ -1255,18 +1248,13 @@
    1255 1248
       (:args (object :scs (descriptor-reg)))
    
    1256 1249
       (:info index)
    
    1257 1250
       (:arg-types simple-string (:constant (signed-byte 30)))
    
    1258
    -  (:temporary (:sc unsigned-reg :offset rax-offset :target value
    
    1259
    -		   :from (:eval 0) :to (:result 0))
    
    1260
    -	      rax)
    
    1261
    -  (:ignore rax)
    
    1262 1251
       (:results (value :scs (base-char-reg)))
    
    1263 1252
       (:result-types base-char)
    
    1264 1253
       (:generator 4
    
    1265
    -    (inst mov al-tn
    
    1266
    -	  (make-ea :byte :base object
    
    1267
    -		   :disp (- (+ (* vector-data-offset word-bytes) index)
    
    1268
    -			    other-pointer-type)))
    
    1269
    -    (move value al-tn)))
    
    1254
    +    (inst movzx value
    
    1255
    +	  (make-ea :word :base object
    
    1256
    +		   :disp (- (+ (* vector-data-offset word-bytes) (* 2 index))
    
    1257
    +			    other-pointer-type)))))
    
    1270 1258
     
    
    1271 1259
     
    
    1272 1260
     (define-vop (data-vector-set/simple-string)
    
    ... ... @@ -1276,14 +1264,18 @@
    1276 1264
     	 (index :scs (unsigned-reg) :to (:eval 0))
    
    1277 1265
     	 (value :scs (base-char-reg)))
    
    1278 1266
       (:arg-types simple-string positive-fixnum base-char)
    
    1279
    -  (:results (result :scs (base-char-reg)))
    
    1267
    +   (:temporary (:sc unsigned-reg :offset rax-offset :target result
    
    1268
    +		   :from (:argument 2) :to (:result 0))
    
    1269
    +	      rax)
    
    1270
    + (:results (result :scs (base-char-reg)))
    
    1280 1271
       (:result-types base-char)
    
    1281 1272
       (:generator 5 
    
    1282
    -    (inst mov (make-ea :byte :base object :index index :scale 1
    
    1273
    +    (move rax value)
    
    1274
    +    (inst mov (make-ea :word :base object :index index :scale 2
    
    1283 1275
     		       :disp (- (* vector-data-offset word-bytes)
    
    1284 1276
     				other-pointer-type))
    
    1285
    -	  value)
    
    1286
    -    (move result value)))
    
    1277
    +	  ax-tn)
    
    1278
    +    (move result rax)))
    
    1287 1279
     
    
    1288 1280
     
    
    1289 1281
     (define-vop (data-vector-set/simple-string-c)
    
    ... ... @@ -1293,14 +1285,19 @@
    1293 1285
     	 (value :scs (base-char-reg)))
    
    1294 1286
       (:info index)
    
    1295 1287
       (:arg-types simple-string (:constant (signed-byte 30)) base-char)
    
    1288
    +  (:temporary (:sc unsigned-reg :offset rax-offset :target result
    
    1289
    +		   :from (:argument 1) :to (:result 0))
    
    1290
    +	      rax)
    
    1296 1291
       (:results (result :scs (base-char-reg)))
    
    1297 1292
       (:result-types base-char)
    
    1298 1293
       (:generator 4
    
    1299
    -   (inst mov (make-ea :byte :base object
    
    1300
    -		      :disp (- (+ (* vector-data-offset word-bytes) index)
    
    1301
    -			       other-pointer-type))
    
    1302
    -	 value)
    
    1303
    -   (move result value)))
    
    1294
    +    (move rax value)
    
    1295
    +    (inst mov (make-ea :word :base object
    
    1296
    +		       :disp (- (+ (* vector-data-offset word-bytes)
    
    1297
    +				   (* 2 index))
    
    1298
    +				other-pointer-type))
    
    1299
    +	  ax-tn)
    
    1300
    +    (move result rax)))
    
    1304 1301
     
    
    1305 1302
     
    
    1306 1303
     ;;; signed-byte-8
    

  • src/compiler/amd64/char.lisp
    ... ... @@ -30,41 +30,31 @@
    30 30
     ;;; Move a tagged char to an untagged representation.
    
    31 31
     ;;;
    
    32 32
     (define-vop (move-to-base-char)
    
    33
    -  (:args (x :scs (any-reg control-stack) :target al))
    
    34
    -  (:temporary (:sc byte-reg :offset al-offset
    
    35
    -		   :from (:argument 0) :to (:eval 0)) al)
    
    36
    -  (:ignore al)
    
    37
    -  (:temporary (:sc byte-reg :offset ah-offset :target y
    
    38
    -		   :from (:argument 0) :to (:result 0)) ah)
    
    39
    -  (:results (y :scs (base-char-reg base-char-stack)))
    
    40
    -  (:note "character untagging")
    
    33
    +  (:args (x :scs (any-reg control-stack) :target y))
    
    34
    +  (:results (y :scs (base-char-reg)))
    
    35
    +  (:note _N"character untagging")
    
    41 36
       (:generator 1
    
    42
    -    (move rax-tn x)
    
    43
    -    (move y ah)))
    
    37
    +    (move y x)
    
    38
    +    (inst shr y type-bits)))
    
    44 39
     ;;;
    
    45 40
     (define-move-vop move-to-base-char :move
    
    46
    -  (any-reg control-stack) (base-char-reg base-char-stack))
    
    41
    +  (any-reg control-stack) (base-char-reg))
    
    47 42
     
    
    48 43
     
    
    49 44
     ;;; Move an untagged char to a tagged representation.
    
    50 45
     ;;;
    
    51 46
     (define-vop (move-from-base-char)
    
    52
    -  (:args (x :scs (base-char-reg base-char-stack) :target ah))
    
    53
    -  (:temporary (:sc byte-reg :offset al-offset :target y
    
    54
    -		   :from (:argument 0) :to (:result 0)) al)
    
    55
    -  (:temporary (:sc byte-reg :offset ah-offset
    
    56
    -		   :from (:argument 0) :to (:result 0)) ah)
    
    57
    -  (:results (y :scs (any-reg descriptor-reg control-stack)))
    
    58
    -  (:note "character tagging")
    
    47
    +  (:args (x :scs (base-char-reg base-char-stack) :target y))
    
    48
    +  (:results (y :scs (any-reg descriptor-reg)))
    
    49
    +  (:note _N"character tagging")
    
    59 50
       (:generator 1
    
    60
    -    (move ah x)				; maybe move char byte
    
    61
    -    (inst mov al base-char-type)	; #x86 to type bits
    
    62
    -    (inst and rax-tn #xffff)		; remove any junk bits
    
    63
    -    (move y rax-tn)))
    
    51
    +    (move y x)
    
    52
    +    (inst shl y type-bits)
    
    53
    +    (inst or y base-char-type)))
    
    64 54
     
    
    65 55
     ;;;
    
    66 56
     (define-move-vop move-from-base-char :move
    
    67
    -  (base-char-reg base-char-stack) (any-reg descriptor-reg control-stack))
    
    57
    +  (base-char-reg base-char-stack) (any-reg descriptor-reg))
    
    68 58
     
    
    69 59
     ;;; Move untagged base-char values.
    
    70 60
     ;;;
    
    ... ... @@ -74,7 +64,7 @@
    74 64
     	    :load-if (not (location= x y))))
    
    75 65
       (:results (y :scs (base-char-reg base-char-stack)
    
    76 66
     	       :load-if (not (location= x y))))
    
    77
    -  (:note "character move")
    
    67
    +  (:note _N"character move")
    
    78 68
       (:effects)
    
    79 69
       (:affected)
    
    80 70
       (:generator 0
    
    ... ... @@ -92,15 +82,13 @@
    92 82
     	 (fp :scs (any-reg)
    
    93 83
     	     :load-if (not (sc-is y base-char-reg))))
    
    94 84
       (:results (y))
    
    95
    -  (:note "character arg move")
    
    85
    +  (:note _N"character arg move")
    
    96 86
       (:generator 0
    
    97 87
         (sc-case y
    
    98 88
           (base-char-reg
    
    99 89
            (move y x))
    
    100 90
           (base-char-stack
    
    101
    -       (inst mov
    
    102
    -	     (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) word-bytes)))
    
    103
    -	     x)))))
    
    91
    +       (storew x fp (- (1+ (tn-offset y))))))))
    
    104 92
     ;;;
    
    105 93
     (define-move-vop move-base-char-argument :move-argument
    
    106 94
       (any-reg base-char-reg) (base-char-reg))
    
    ... ... @@ -119,29 +107,22 @@
    119 107
     (define-vop (char-code)
    
    120 108
       (:translate char-code)
    
    121 109
       (:policy :fast-safe)
    
    122
    -  (:args (ch :scs (base-char-reg base-char-stack)))
    
    110
    +  (:args (ch :scs (base-char-reg base-char-stack) :target res))
    
    123 111
       (:arg-types base-char)
    
    124 112
       (:results (res :scs (unsigned-reg)))
    
    125 113
       (:result-types positive-fixnum)
    
    126 114
       (:generator 1
    
    127
    -	      ;; ah to dh are not addressable when a rex prefix is used
    
    128
    -	      ;; The high 32 bits of doubleword operands are
    
    129
    -	      ;; zero-extended to 64-bits.
    
    130
    -    (inst movzx (64-bit-to-32-bit-tn res) ch)))
    
    115
    +    (move res ch)))
    
    131 116
     
    
    132 117
     (define-vop (code-char)
    
    133 118
       (:translate code-char)
    
    134 119
       (:policy :fast-safe)
    
    135
    -  (:args (code :scs (unsigned-reg unsigned-stack) :target rax))
    
    120
    +  (:args (code :scs (unsigned-reg control-stack) :target res))
    
    136 121
       (:arg-types positive-fixnum)
    
    137
    -  (:temporary (:sc unsigned-reg :offset rax-offset :target res
    
    138
    -		   :from (:argument 0) :to (:result 0))
    
    139
    -	      rax)
    
    140 122
       (:results (res :scs (base-char-reg)))
    
    141 123
       (:result-types base-char)
    
    142 124
       (:generator 1
    
    143
    -    (move rax code)
    
    144
    -    (move res al-tn)))
    
    125
    +    (move res code)))
    
    145 126
     
    
    146 127
     
    
    147 128
     ;;; Comparison of base-chars.
    
    ... ... @@ -155,7 +136,7 @@
    155 136
       (:conditional)
    
    156 137
       (:info target not-p)
    
    157 138
       (:policy :fast-safe)
    
    158
    -  (:note "inline comparison")
    
    139
    +  (:note _N"inline comparison")
    
    159 140
       (:variant-vars condition not-condition)
    
    160 141
       (:generator 3
    
    161 142
         (inst cmp x y)
    
    ... ... @@ -179,7 +160,7 @@
    179 160
       (:conditional)
    
    180 161
       (:info target not-p y)
    
    181 162
       (:policy :fast-safe)
    
    182
    -  (:note "inline comparison")
    
    163
    +  (:note _N"inline comparison")
    
    183 164
       (:variant-vars condition not-condition)
    
    184 165
       (:generator 2
    
    185 166
         (inst cmp x (char-code y))
    

  • src/compiler/amd64/float-sse2.lisp
    1
    +;;; -*- Mode: LISP; Syntax: Common-Lisp; Base: 10; Package: x86 -*-
    
    2
    +;;;
    
    3
    +;;; **********************************************************************
    
    4
    +;;; This code was written as part of the CMU Common Lisp project at
    
    5
    +;;; Carnegie Mellon University, and has been placed in the public domain.
    
    6
    +;;; If you want to use this code or any part of CMU Common Lisp, please contact
    
    7
    +;;; Scott Fahlman or slisp-group@cs.cmu.edu.
    
    8
    +;;;
    
    9
    +(ext:file-comment
    
    10
    +  "$Header: src/compiler/x86/float-sse2.lisp $")
    
    11
    +;;;
    
    12
    +;;; **********************************************************************
    
    13
    +;;;
    
    14
    +;;; This file contains floating point support for the x86.
    
    15
    +;;;
    
    16
    +
    
    17
    +(in-package :amd64)
    
    18
    +(intl:textdomain "cmucl-sse2")
    
    19
    +
    
    20
    +;;; Popping the FP stack.
    
    21
    +;;;
    
    22
    +;;; The default is to use a store and pop, fstp fr0.
    
    23
    +;;; For the AMD Athlon, using ffreep fr0 is faster.
    
    24
    +;;;
    
    25
    +(defun fp-pop ()
    
    26
    +  (if (backend-featurep :athlon)
    
    27
    +      (inst ffreep fr0-tn)
    
    28
    +      (inst fstp fr0-tn)))
    
    29
    +
    
    30
    +
    
    31
    +(macrolet ((ea-for-xf-desc (tn slot)
    
    32
    +	     `(make-ea
    
    33
    +	       :dword :base ,tn
    
    34
    +	       :disp (- (* ,slot vm:word-bytes) vm:other-pointer-type))))
    
    35
    +  (defun ea-for-sf-desc (tn)
    
    36
    +    (ea-for-xf-desc tn vm:single-float-value-slot))
    
    37
    +  (defun ea-for-df-desc (tn)
    
    38
    +    (ea-for-xf-desc tn vm:double-float-value-slot))
    
    39
    +  #+long-float
    
    40
    +  (defun ea-for-lf-desc (tn)
    
    41
    +    (ea-for-xf-desc tn vm:long-float-value-slot))
    
    42
    +  ;; Complex floats
    
    43
    +  (defun ea-for-csf-real-desc (tn)
    
    44
    +    (ea-for-xf-desc tn vm:complex-single-float-real-slot))
    
    45
    +  (defun ea-for-csf-imag-desc (tn)
    
    46
    +    (ea-for-xf-desc tn vm:complex-single-float-imag-slot))
    
    47
    +  (defun ea-for-cdf-real-desc (tn)
    
    48
    +    (ea-for-xf-desc tn vm:complex-double-float-real-slot))
    
    49
    +  (defun ea-for-cdf-imag-desc (tn)
    
    50
    +    (ea-for-xf-desc tn vm:complex-double-float-imag-slot))
    
    51
    +  #+long-float
    
    52
    +  (defun ea-for-clf-real-desc (tn)
    
    53
    +    (ea-for-xf-desc tn vm:complex-long-float-real-slot))
    
    54
    +  #+long-float
    
    55
    +  (defun ea-for-clf-imag-desc (tn)
    
    56
    +    (ea-for-xf-desc tn vm:complex-long-float-imag-slot))
    
    57
    +  #+double-double
    
    58
    +  (defun ea-for-cddf-real-hi-desc (tn)
    
    59
    +    (ea-for-xf-desc tn vm:complex-double-double-float-real-hi-slot))
    
    60
    +  #+double-double
    
    61
    +  (defun ea-for-cddf-real-lo-desc (tn)
    
    62
    +    (ea-for-xf-desc tn vm:complex-double-double-float-real-lo-slot))
    
    63
    +  #+double-double
    
    64
    +  (defun ea-for-cddf-imag-hi-desc (tn)
    
    65
    +    (ea-for-xf-desc tn vm:complex-double-double-float-imag-hi-slot))
    
    66
    +  #+double-double
    
    67
    +  (defun ea-for-cddf-imag-lo-desc (tn)
    
    68
    +    (ea-for-xf-desc tn vm:complex-double-double-float-imag-lo-slot))
    
    69
    +  )
    
    70
    +
    
    71
    +(macrolet ((ea-for-xf-stack (tn kind)
    
    72
    +	     `(make-ea
    
    73
    +	       :dword :base rbp-tn
    
    74
    +	       :disp (- (* (+ (tn-offset ,tn)
    
    75
    +			      (ecase ,kind (:single 1) (:double 2) (:long 3)))
    
    76
    +			 vm:word-bytes)))))
    
    77
    +  (defun ea-for-sf-stack (tn)
    
    78
    +    (ea-for-xf-stack tn :single))
    
    79
    +  (defun ea-for-df-stack (tn)
    
    80
    +    (ea-for-xf-stack tn :double))
    
    81
    +  #+long-float
    
    82
    +  (defun ea-for-lf-stack (tn)
    
    83
    +    (ea-for-xf-stack tn :long)))
    
    84
    +
    
    85
    +;;; Complex float stack EAs
    
    86
    +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base)
    
    87
    +	     `(make-ea
    
    88
    +	       :qword :base ,base
    
    89
    +	       :disp (- (* (+ (tn-offset ,tn)
    
    90
    +			      (* (ecase ,kind
    
    91
    +				   (:single 1)
    
    92
    +				   (:double 2)
    
    93
    +				   (:long 3))
    
    94
    +				 (ecase ,slot
    
    95
    +				   ;; We want the real part to be at
    
    96
    +				   ;; the lower address!
    
    97
    +				   (:real 2)
    
    98
    +				   (:imag 1)
    
    99
    +				   (:real-hi 1)
    
    100
    +				   (:real-lo 2)
    
    101
    +				   (:imag-hi 3)
    
    102
    +				   (:imag-lo 4))))
    
    103
    +			 vm:word-bytes)))))
    
    104
    +  (defun ea-for-csf-real-stack (tn &optional (base rbp-tn))
    
    105
    +    (ea-for-cxf-stack tn :single :real base))
    
    106
    +  (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn))
    
    107
    +    (ea-for-cxf-stack tn :single :imag base))
    
    108
    +  (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn))
    
    109
    +    (ea-for-cxf-stack tn :double :real base))
    
    110
    +  (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn))
    
    111
    +    (ea-for-cxf-stack tn :double :imag base))
    
    112
    +  ;;
    
    113
    +  #+long-float
    
    114
    +  (defun ea-for-clf-real-stack (tn &optional (base ebp-tn))
    
    115
    +    (ea-for-cxf-stack tn :long :real base))
    
    116
    +  #+long-float
    
    117
    +  (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn))
    
    118
    +    (ea-for-cxf-stack tn :long :imag base))
    
    119
    +
    
    120
    +  #+double-double
    
    121
    +  (defun ea-for-cddf-real-hi-stack (tn &optional (base rbp-tn))
    
    122
    +    (ea-for-cxf-stack tn :double :real-hi base))
    
    123
    +  #+double-double
    
    124
    +  (defun ea-for-cddf-real-lo-stack (tn &optional (base rbp-tn))
    
    125
    +    (ea-for-cxf-stack tn :double :real-lo base))
    
    126
    +  #+double-double
    
    127
    +  (defun ea-for-cddf-imag-hi-stack (tn &optional (base rbp-tn))
    
    128
    +    (ea-for-cxf-stack tn :double :imag-hi base))
    
    129
    +  #+double-double
    
    130
    +  (defun ea-for-cddf-imag-lo-stack (tn &optional (base rbp-tn))
    
    131
    +    (ea-for-cxf-stack tn :double :imag-lo base))
    
    132
    +  )
    
    133
    +
    
    134
    +;;; The x86 can't store a long-float to memory without popping the
    
    135
    +;;; stack and marking a register as empty, so it is necessary to
    
    136
    +;;; restore the register from memory.
    
    137
    +(defun store-long-float (ea)
    
    138
    +   (inst fstpl ea)
    
    139
    +   (inst fldl ea))
    
    140
    +
    
    141
    +
    
    142
    +;;;; Move functions:
    
    143
    +
    
    144
    +;;; x is source, y is destination
    
    145
    +(define-move-function (load-single 2) (vop x y)
    
    146
    +  ((single-stack) (single-reg))
    
    147
    +  (inst movss y (ea-for-sf-stack x)))
    
    148
    +
    
    149
    +(define-move-function (store-single 2) (vop x y)
    
    150
    +  ((single-reg) (single-stack))
    
    151
    +  (inst movss (ea-for-sf-stack y) x))
    
    152
    +
    
    153
    +(define-move-function (load-double 2) (vop x y)
    
    154
    +  ((double-stack) (double-reg))
    
    155
    +  (inst movsd y (ea-for-df-stack x)))
    
    156
    +
    
    157
    +(define-move-function (store-double 2) (vop x y)
    
    158
    +  ((double-reg) (double-stack))
    
    159
    +  (inst movsd (ea-for-df-stack y) x))
    
    160
    +
    
    161
    +#+long-float
    
    162
    +(define-move-function (load-long 2) (vop x y)
    
    163
    +  ((long-stack) (long-reg))
    
    164
    +  (with-empty-tn@fp-top(y)
    
    165
    +     (inst fldl (ea-for-lf-stack x))))
    
    166
    +
    
    167
    +#+long-float
    
    168
    +(define-move-function (store-long 2) (vop x y)
    
    169
    +  ((long-reg) (long-stack))
    
    170
    +  (cond ((zerop (tn-offset x))
    
    171
    +	 (store-long-float (ea-for-lf-stack y)))
    
    172
    +	(t
    
    173
    +	 (inst fxch x)
    
    174
    +	 (store-long-float (ea-for-lf-stack y))
    
    175
    +	 ;; This may not be necessary as ST0 is likely invalid now.
    
    176
    +	 (inst fxch x))))
    
    177
    +
    
    178
    +(define-move-function (load-fp-constant 2) (vop x y)
    
    179
    +  ((fp-constant) (single-reg double-reg))
    
    180
    +  (let ((value (c::constant-value (c::tn-leaf x))))
    
    181
    +    (cond ((and (zerop value)
    
    182
    +		(= (float-sign value) 1))
    
    183
    +	   (sc-case y
    
    184
    +	     (single-reg (inst xorps y y))
    
    185
    +	     (double-reg (inst xorpd y y))))
    
    186
    +	  (t
    
    187
    +	   (warn (intl:gettext "Ignoring bogus i387 Constant ~a") value)))))
    
    188
    +
    
    189
    +
    
    190
    +;;;; Complex float move functions
    
    191
    +
    
    192
    +(defun complex-single-reg-real-tn (x)
    
    193
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg *backend*)
    
    194
    +		  :offset (tn-offset x)))
    
    195
    +(defun complex-single-reg-imag-tn (x)
    
    196
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg *backend*)
    
    197
    +		  :offset (1+ (tn-offset x))))
    
    198
    +
    
    199
    +(defun complex-double-reg-real-tn (x)
    
    200
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    201
    +		  :offset (tn-offset x)))
    
    202
    +(defun complex-double-reg-imag-tn (x)
    
    203
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    204
    +		  :offset (1+ (tn-offset x))))
    
    205
    +
    
    206
    +#+long-float
    
    207
    +(defun complex-long-reg-real-tn (x)
    
    208
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg *backend*)
    
    209
    +		  :offset (tn-offset x)))
    
    210
    +#+long-float
    
    211
    +(defun complex-long-reg-imag-tn (x)
    
    212
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg *backend*)
    
    213
    +		  :offset (1+ (tn-offset x))))
    
    214
    +
    
    215
    +#+double-double
    
    216
    +(progn
    
    217
    +(defun complex-double-double-reg-real-hi-tn (x)
    
    218
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    219
    +		  :offset (tn-offset x)))
    
    220
    +(defun complex-double-double-reg-real-lo-tn (x)
    
    221
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    222
    +		  :offset (+ 1 (tn-offset x))))
    
    223
    +(defun complex-double-double-reg-imag-hi-tn (x)
    
    224
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    225
    +		  :offset (+ 2 (tn-offset x))))
    
    226
    +(defun complex-double-double-reg-imag-lo-tn (x)
    
    227
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    228
    +		  :offset (+ 3 (tn-offset x))))
    
    229
    +)
    
    230
    +;;; x is source, y is destination
    
    231
    +(define-move-function (load-complex-single 2) (vop x y)
    
    232
    +  ((complex-single-stack) (complex-single-reg))
    
    233
    +  (inst movlps y (ea-for-csf-real-stack x)))
    
    234
    +
    
    235
    +(define-move-function (store-complex-single 2) (vop x y)
    
    236
    +  ((complex-single-reg) (complex-single-stack))
    
    237
    +  (inst movlps (ea-for-csf-real-stack y) x))
    
    238
    +
    
    239
    +(define-move-function (load-complex-double 2) (vop x y)
    
    240
    +  ((complex-double-stack) (complex-double-reg))
    
    241
    +  (inst movupd y (ea-for-cdf-real-stack x)))
    
    242
    +
    
    243
    +(define-move-function (store-complex-double 2) (vop x y)
    
    244
    +  ((complex-double-reg) (complex-double-stack))
    
    245
    +  (inst movupd (ea-for-cdf-real-stack y) x))
    
    246
    +
    
    247
    +#+long-float
    
    248
    +(define-move-function (load-complex-long 2) (vop x y)
    
    249
    +  ((complex-long-stack) (complex-long-reg))
    
    250
    +  (let ((real-tn (complex-long-reg-real-tn y)))
    
    251
    +    (with-empty-tn@fp-top(real-tn)
    
    252
    +      (inst fldl (ea-for-clf-real-stack x))))
    
    253
    +  (let ((imag-tn (complex-long-reg-imag-tn y)))
    
    254
    +    (with-empty-tn@fp-top(imag-tn)
    
    255
    +      (inst fldl (ea-for-clf-imag-stack x)))))
    
    256
    +
    
    257
    +#+long-float
    
    258
    +(define-move-function (store-complex-long 2) (vop x y)
    
    259
    +  ((complex-long-reg) (complex-long-stack))
    
    260
    +  (let ((real-tn (complex-long-reg-real-tn x)))
    
    261
    +    (cond ((zerop (tn-offset real-tn))
    
    262
    +	   (store-long-float (ea-for-clf-real-stack y)))
    
    263
    +	  (t
    
    264
    +	   (inst fxch real-tn)
    
    265
    +	   (store-long-float (ea-for-clf-real-stack y))
    
    266
    +	   (inst fxch real-tn))))
    
    267
    +  (let ((imag-tn (complex-long-reg-imag-tn x)))
    
    268
    +    (inst fxch imag-tn)
    
    269
    +    (store-long-float (ea-for-clf-imag-stack y))
    
    270
    +    (inst fxch imag-tn)))
    
    271
    +
    
    272
    +#+double-double
    
    273
    +(progn
    
    274
    +(define-move-function (load-complex-double-double 4) (vop x y)
    
    275
    +  ((complex-double-double-stack) (complex-double-double-reg))
    
    276
    +  (let ((real-tn (complex-double-double-reg-real-hi-tn y)))
    
    277
    +    (inst movsd real-tn (ea-for-cddf-real-hi-stack x)))
    
    278
    +  (let ((real-tn (complex-double-double-reg-real-lo-tn y)))
    
    279
    +    (inst movsd real-tn (ea-for-cddf-real-lo-stack x)))
    
    280
    +  (let ((imag-tn (complex-double-double-reg-imag-hi-tn y)))
    
    281
    +    (inst movsd imag-tn (ea-for-cddf-imag-hi-stack x)))
    
    282
    +  (let ((imag-tn (complex-double-double-reg-imag-lo-tn y)))
    
    283
    +    (inst movsd imag-tn (ea-for-cddf-imag-lo-stack x))))
    
    284
    +
    
    285
    +(define-move-function (store-complex-double-double 4) (vop x y)
    
    286
    +  ((complex-double-double-reg) (complex-double-double-stack))
    
    287
    +  ;; FIXME: These may not be right!!!!
    
    288
    +  (let ((real-tn (complex-double-double-reg-real-hi-tn x)))
    
    289
    +    (inst movsd (ea-for-cddf-real-hi-stack y) real-tn))
    
    290
    +  (let ((real-tn (complex-double-double-reg-real-lo-tn x)))
    
    291
    +    (inst movsd (ea-for-cddf-real-lo-stack y) real-tn))
    
    292
    +  (let ((imag-tn (complex-double-double-reg-imag-hi-tn x)))
    
    293
    +    (inst movsd (ea-for-cddf-imag-hi-stack y) imag-tn))
    
    294
    +  (let ((imag-tn (complex-double-double-reg-imag-lo-tn x)))
    
    295
    +    (inst movsd (ea-for-cddf-imag-lo-stack y) imag-tn)))
    
    296
    +
    
    297
    +)
    
    298
    +
    
    299
    +;;;; Move VOPs:
    
    300
    +
    
    301
    +;;;
    
    302
    +;;; Float register to register moves.
    
    303
    +;;;
    
    304
    +#+nil
    
    305
    +(define-vop (float-move)
    
    306
    +  (:args (x))
    
    307
    +  (:results (y))
    
    308
    +  (:note _N"float move")
    
    309
    +  (:generator 0
    
    310
    +     (unless (location= x y)
    
    311
    +       (inst movq y x))))
    
    312
    +
    
    313
    +(define-vop (float-move/single)
    
    314
    +  (:args (x))
    
    315
    +  (:results (y))
    
    316
    +  (:note _N"float move")
    
    317
    +  (:temporary (:sc single-stack) temp)
    
    318
    +  (:generator 0
    
    319
    +    (unless (location= x y)
    
    320
    +      (let ((x-offset (tn-offset x))
    
    321
    +	    (y-offset (tn-offset y)))
    
    322
    +	(cond ((and (zerop x-offset)
    
    323
    +		    (>= y-offset 8))
    
    324
    +	       ;; Move fr0 to xmm
    
    325
    +	       (inst fst (ea-for-sf-stack temp))
    
    326
    +	       (inst movss y (ea-for-sf-stack temp)))
    
    327
    +	      ((and (>= x-offset 8)
    
    328
    +		    (>= y-offset 8))
    
    329
    +	       (inst movq y x))
    
    330
    +	      (t
    
    331
    +	       (error "Don't know how to move ~S to ~S" x y)))))))
    
    332
    +
    
    333
    +(define-vop (float-move/double)
    
    334
    +  (:args (x))
    
    335
    +  (:results (y))
    
    336
    +  (:note _N"float move")
    
    337
    +  (:temporary (:sc double-stack) temp)
    
    338
    +  (:generator 0
    
    339
    +    (unless (location= x y)
    
    340
    +      (let ((x-offset (tn-offset x))
    
    341
    +	    (y-offset (tn-offset y)))
    
    342
    +	(cond ((and (zerop x-offset)
    
    343
    +		    (>= y-offset 8))
    
    344
    +	       ;; Move fr0 to xmm
    
    345
    +	       (inst fstd (ea-for-df-stack temp))
    
    346
    +	       (inst movsd y (ea-for-df-stack temp)))
    
    347
    +	      ((and (>= x-offset 8)
    
    348
    +		    (>= y-offset 8))
    
    349
    +	       (inst movq y x))
    
    350
    +	      (t
    
    351
    +	       (error "Don't know how to move ~S to ~S" x y)))))))
    
    352
    +
    
    353
    +(define-vop (single-move float-move/single)
    
    354
    +  (:args (x :scs (single-reg) :target y :load-if (not (location= x y))))
    
    355
    +  (:results (y :scs (single-reg) :load-if (not (location= x y)))))
    
    356
    +
    
    357
    +(define-move-vop single-move :move (single-reg) (single-reg))
    
    358
    +
    
    359
    +(define-vop (double-move float-move/double)
    
    360
    +  (:args (x :scs (double-reg) :target y :load-if (not (location= x y))))
    
    361
    +  (:results (y :scs (double-reg) :load-if (not (location= x y)))))
    
    362
    +(define-move-vop double-move :move (double-reg) (double-reg))
    
    363
    +
    
    364
    +#+long-float
    
    365
    +(define-vop (long-move float-move)
    
    366
    +  (:args (x :scs (long-reg) :target y :load-if (not (location= x y))))
    
    367
    +  (:results (y :scs (long-reg) :load-if (not (location= x y)))))
    
    368
    +#+long-float
    
    369
    +(define-move-vop long-move :move (long-reg) (long-reg))
    
    370
    +
    
    371
    +;;;
    
    372
    +;;; Complex float register to register moves.
    
    373
    +;;;
    
    374
    +(define-vop (complex-single-move)
    
    375
    +  (:args (x :scs (complex-single-reg) :target y
    
    376
    +	    :load-if (not (location= x y))))
    
    377
    +  (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))
    
    378
    +  (:generator 0
    
    379
    +    (unless (location= x y)
    
    380
    +      (inst movaps y x))))
    
    381
    +
    
    382
    +(define-move-vop complex-single-move :move
    
    383
    +  (complex-single-reg) (complex-single-reg))
    
    384
    +
    
    385
    +(define-vop (complex-double-move)
    
    386
    +  (:args (x :scs (complex-double-reg)
    
    387
    +	    :target y :load-if (not (location= x y))))
    
    388
    +  (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))
    
    389
    +  (:generator 0
    
    390
    +    (unless (location= x y)
    
    391
    +      (inst movapd y x))))
    
    392
    +
    
    393
    +(define-move-vop complex-double-move :move
    
    394
    +  (complex-double-reg) (complex-double-reg))
    
    395
    +
    
    396
    +    
    
    397
    +#+long-float
    
    398
    +(define-vop (complex-long-move complex-float-move)
    
    399
    +  (:args (x :scs (complex-long-reg)
    
    400
    +	    :target y :load-if (not (location= x y))))
    
    401
    +  (:results (y :scs (complex-long-reg) :load-if (not (location= x y)))))
    
    402
    +#+long-float
    
    403
    +(define-move-vop complex-long-move :move
    
    404
    +  (complex-long-reg) (complex-long-reg))
    
    405
    +
    
    406
    +
    
    407
    +;;;
    
    408
    +;;; Move from float to a descriptor reg. allocating a new float
    
    409
    +;;; object in the process.
    
    410
    +;;;
    
    411
    +(define-vop (move-from-single)
    
    412
    +  (:args (x :scs (single-reg) :to :save))
    
    413
    +  (:results (y :scs (descriptor-reg)))
    
    414
    +  (:node-var node)
    
    415
    +  (:note _N"float to pointer coercion")
    
    416
    +  (:generator 13
    
    417
    +     (with-fixed-allocation (y vm:single-float-type vm:single-float-size node)
    
    418
    +       (inst movss (ea-for-sf-desc y) x))))
    
    419
    +(define-move-vop move-from-single :move
    
    420
    +  (single-reg) (descriptor-reg))
    
    421
    +
    
    422
    +(define-vop (move-from-double)
    
    423
    +  (:args (x :scs (double-reg) :to :save))
    
    424
    +  (:results (y :scs (descriptor-reg)))
    
    425
    +  (:node-var node)
    
    426
    +  (:note _N"float to pointer coercion")
    
    427
    +  (:generator 13
    
    428
    +     (with-fixed-allocation (y vm:double-float-type vm:double-float-size node)
    
    429
    +       (inst movsd (ea-for-df-desc y) x))))
    
    430
    +(define-move-vop move-from-double :move
    
    431
    +  (double-reg) (descriptor-reg))
    
    432
    +
    
    433
    +#+long-float
    
    434
    +(define-vop (move-from-long)
    
    435
    +  (:args (x :scs (long-reg) :to :save))
    
    436
    +  (:results (y :scs (descriptor-reg)))
    
    437
    +  (:node-var node)
    
    438
    +  (:note _N"float to pointer coercion")
    
    439
    +  (:generator 13
    
    440
    +     (with-fixed-allocation (y vm:long-float-type vm:long-float-size node)
    
    441
    +       (with-tn@fp-top(x)
    
    442
    +	 (store-long-float (ea-for-lf-desc y))))))
    
    443
    +#+long-float
    
    444
    +(define-move-vop move-from-long :move
    
    445
    +  (long-reg) (descriptor-reg))
    
    446
    +
    
    447
    +(define-vop (move-from-fp-constant)
    
    448
    +  (:args (x :scs (fp-constant)))
    
    449
    +  (:results (y :scs (descriptor-reg)))
    
    450
    +  (:generator 2
    
    451
    +     (ecase (c::constant-value (c::tn-leaf x))
    
    452
    +       (0f0 (load-symbol-value y *fp-constant-0s0*))
    
    453
    +       #+nil
    
    454
    +       (1f0 (load-symbol-value y *fp-constant-1s0*))
    
    455
    +       (0d0 (load-symbol-value y *fp-constant-0d0*))
    
    456
    +       #+nil
    
    457
    +       (1d0 (load-symbol-value y *fp-constant-1d0*)))))
    
    458
    +(define-move-vop move-from-fp-constant :move
    
    459
    +  (fp-constant) (descriptor-reg))
    
    460
    +
    
    461
    +;;;
    
    462
    +;;; Move from a descriptor to a float register
    
    463
    +;;;
    
    464
    +(define-vop (move-to-single)
    
    465
    +  (:args (x :scs (descriptor-reg)))
    
    466
    +  (:results (y :scs (single-reg)))
    
    467
    +  (:note _N"pointer to float coercion")
    
    468
    +  (:generator 2
    
    469
    +    (inst movss y (ea-for-sf-desc x))))
    
    470
    +(define-move-vop move-to-single :move (descriptor-reg) (single-reg))
    
    471
    +
    
    472
    +(define-vop (move-to-double)
    
    473
    +  (:args (x :scs (descriptor-reg)))
    
    474
    +  (:results (y :scs (double-reg)))
    
    475
    +  (:note _N"pointer to float coercion")
    
    476
    +  (:generator 2
    
    477
    +    (inst movsd y (ea-for-df-desc x))))
    
    478
    +(define-move-vop move-to-double :move (descriptor-reg) (double-reg))
    
    479
    +
    
    480
    +#+long-float
    
    481
    +(define-vop (move-to-long)
    
    482
    +  (:args (x :scs (descriptor-reg)))
    
    483
    +  (:results (y :scs (long-reg)))
    
    484
    +  (:note _N"pointer to float coercion")
    
    485
    +  (:generator 2
    
    486
    +     (with-empty-tn@fp-top(y)
    
    487
    +       (inst fldl (ea-for-lf-desc x)))))
    
    488
    +#+long-float
    
    489
    +(define-move-vop move-to-long :move (descriptor-reg) (long-reg))
    
    490
    +
    
    491
    +
    
    492
    +;;;
    
    493
    +;;; Move from complex float to a descriptor reg. allocating a new
    
    494
    +;;; complex float object in the process.
    
    495
    +;;;
    
    496
    +(define-vop (move-from-complex-single)
    
    497
    +  (:args (x :scs (complex-single-reg) :to :save))
    
    498
    +  (:results (y :scs (descriptor-reg)))
    
    499
    +  (:node-var node)
    
    500
    +  (:note _N"complex float to pointer coercion")
    
    501
    +  (:generator 13
    
    502
    +     (with-fixed-allocation (y vm:complex-single-float-type
    
    503
    +			       vm:complex-single-float-size node)
    
    504
    +       (inst movlps (ea-for-csf-real-desc y) x))))
    
    505
    +(define-move-vop move-from-complex-single :move
    
    506
    +  (complex-single-reg) (descriptor-reg))
    
    507
    +
    
    508
    +(define-vop (move-from-complex-double)
    
    509
    +  (:args (x :scs (complex-double-reg) :to :save))
    
    510
    +  (:results (y :scs (descriptor-reg)))
    
    511
    +  (:node-var node)
    
    512
    +  (:note _N"complex float to pointer coercion")
    
    513
    +  (:generator 13
    
    514
    +     (with-fixed-allocation (y vm:complex-double-float-type
    
    515
    +			       vm:complex-double-float-size node)
    
    516
    +       (inst movupd (ea-for-cdf-real-desc y) x))))
    
    517
    +
    
    518
    +(define-move-vop move-from-complex-double :move
    
    519
    +  (complex-double-reg) (descriptor-reg))
    
    520
    +
    
    521
    +#+long-float
    
    522
    +(define-vop (move-from-complex-long)
    
    523
    +  (:args (x :scs (complex-long-reg) :to :save))
    
    524
    +  (:results (y :scs (descriptor-reg)))
    
    525
    +  (:node-var node)
    
    526
    +  (:note _N"complex float to pointer coercion")
    
    527
    +  (:generator 13
    
    528
    +     (with-fixed-allocation (y vm:complex-long-float-type
    
    529
    +			       vm:complex-long-float-size node)
    
    530
    +       (let ((real-tn (complex-long-reg-real-tn x)))
    
    531
    +	 (with-tn@fp-top(real-tn)
    
    532
    +	   (store-long-float (ea-for-clf-real-desc y))))
    
    533
    +       (let ((imag-tn (complex-long-reg-imag-tn x)))
    
    534
    +	 (with-tn@fp-top(imag-tn)
    
    535
    +	   (store-long-float (ea-for-clf-imag-desc y)))))))
    
    536
    +#+long-float
    
    537
    +(define-move-vop move-from-complex-long :move
    
    538
    +  (complex-long-reg) (descriptor-reg))
    
    539
    +
    
    540
    +#+double-double
    
    541
    +(define-vop (move-from-complex-double-double)
    
    542
    +  (:args (x :scs (complex-double-double-reg) :to :save))
    
    543
    +  (:results (y :scs (descriptor-reg)))
    
    544
    +  (:node-var node)
    
    545
    +  (:note _N"complex double-double float to pointer coercion")
    
    546
    +  (:generator 13
    
    547
    +     (with-fixed-allocation (y vm::complex-double-double-float-type
    
    548
    +			       vm::complex-double-double-float-size node)
    
    549
    +       (let ((real-tn (complex-double-double-reg-real-hi-tn x)))
    
    550
    +	 (inst movsd (ea-for-cddf-real-hi-desc y) real-tn))
    
    551
    +       (let ((real-tn (complex-double-double-reg-real-lo-tn x)))
    
    552
    +	 (inst movsd (ea-for-cddf-real-lo-desc y) real-tn))
    
    553
    +       (let ((imag-tn (complex-double-double-reg-imag-hi-tn x)))
    
    554
    +	 (inst movsd (ea-for-cddf-imag-hi-desc y) imag-tn))
    
    555
    +       (let ((imag-tn (complex-double-double-reg-imag-lo-tn x)))
    
    556
    +	 (inst movsd (ea-for-cddf-imag-lo-desc y) imag-tn)))))
    
    557
    +;;;
    
    558
    +#+double-double
    
    559
    +(define-move-vop move-from-complex-double-double :move
    
    560
    +  (complex-double-double-reg) (descriptor-reg))
    
    561
    +;;;
    
    562
    +;;; Move from a descriptor to a complex float register
    
    563
    +;;;
    
    564
    +(define-vop (move-to-complex-single)
    
    565
    +  (:args (x :scs (descriptor-reg)))
    
    566
    +  (:results (y :scs (complex-single-reg)))
    
    567
    +  (:note _N"pointer to complex float coercion")
    
    568
    +  (:generator 2
    
    569
    +    (inst movlps y (ea-for-csf-real-desc x))))
    
    570
    +
    
    571
    +(define-move-vop move-to-complex-single :move
    
    572
    +  (descriptor-reg) (complex-single-reg))
    
    573
    +
    
    574
    +(define-vop (move-to-complex-double)
    
    575
    +  (:args (x :scs (descriptor-reg)))
    
    576
    +  (:results (y :scs (complex-double-reg)))
    
    577
    +  (:note _N"pointer to complex float coercion")
    
    578
    +  (:generator 2
    
    579
    +    (inst movupd y (ea-for-cdf-real-desc x))))
    
    580
    +
    
    581
    +(define-move-vop move-to-complex-double :move
    
    582
    +  (descriptor-reg) (complex-double-reg))
    
    583
    +
    
    584
    +
    
    585
    +;;;
    
    586
    +;;; The move argument vops.
    
    587
    +;;;
    
    588
    +;;; Note these are also used to stuff fp numbers onto the c-call stack
    
    589
    +;;; so the order is different than the lisp-stack.
    
    590
    +
    
    591
    +;;; The general move-argument vop
    
    592
    +(macrolet ((frob (name sc stack-sc format)
    
    593
    +	     `(progn
    
    594
    +		(define-vop (,name)
    
    595
    +		  (:args (x :scs (,sc) :target y)
    
    596
    +			 (fp :scs (any-reg)
    
    597
    +			     :load-if (not (sc-is y ,sc))))
    
    598
    +		  (:results (y))
    
    599
    +		  (:note _N"float argument move")
    
    600
    +		  (:generator ,(case format (:single 2) (:double 3) (:long 4))
    
    601
    +		    (sc-case y
    
    602
    +		      (,sc
    
    603
    +		       (unless (location= x y)
    
    604
    +			 (inst movq y x)))
    
    605
    +		      (,stack-sc
    
    606
    +		       (if (= (tn-offset fp) esp-offset)
    
    607
    +			   (let* ((offset (* (tn-offset y) word-bytes))
    
    608
    +				  (ea (make-ea :dword :base fp :disp offset)))
    
    609
    +			     ,@(ecase format
    
    610
    +				      (:single '((inst movss ea x)))
    
    611
    +				      (:double '((inst movsd ea x)))))
    
    612
    +			   (let ((ea (make-ea
    
    613
    +				      :dword :base fp
    
    614
    +				      :disp (- (* (+ (tn-offset y)
    
    615
    +						     ,(case format
    
    616
    +							    (:single 1)
    
    617
    +							    (:double 2)
    
    618
    +							    (:long 3)))
    
    619
    +						  vm:word-bytes)))))
    
    620
    +			     ,@(ecase format 
    
    621
    +				      (:single '((inst movss ea x)))
    
    622
    +				      (:double '((inst movsd ea x))))))))))
    
    623
    +		(define-move-vop ,name :move-argument
    
    624
    +		  (,sc descriptor-reg) (,sc)))))
    
    625
    +  (frob move-single-float-argument single-reg single-stack :single)
    
    626
    +  (frob move-double-float-argument double-reg double-stack :double))
    
    627
    +
    
    628
    +;;;; Complex float move-argument vop
    
    629
    +(define-vop (move-complex-single-float-argument)
    
    630
    +  (:args (x :scs (complex-single-reg) :target y)
    
    631
    +	 (fp :scs (any-reg)
    
    632
    +	     :load-if (not (sc-is y complex-single-reg))))
    
    633
    +  (:results (y))
    
    634
    +  (:note _N"complex float argument move")
    
    635
    +  (:generator 3
    
    636
    +    (sc-case y
    
    637
    +      (complex-single-reg
    
    638
    +       (unless (location= x y)
    
    639
    +	 (inst movaps y x)))
    
    640
    +      (complex-single-stack
    
    641
    +       (inst movlps (ea-for-csf-real-stack y fp) x)))))
    
    642
    +
    
    643
    +(define-move-vop move-complex-single-float-argument :move-argument
    
    644
    +  (complex-single-reg descriptor-reg) (complex-single-reg))
    
    645
    +
    
    646
    +(define-vop (move-complex-double-float-argument)
    
    647
    +  (:args (x :scs (complex-double-reg) :target y)
    
    648
    +	 (fp :scs (any-reg)
    
    649
    +	     :load-if (not (sc-is y complex-double-reg))))
    
    650
    +  (:results (y))
    
    651
    +  (:note _N"complex float argument move")
    
    652
    +  (:generator 3
    
    653
    +    (sc-case y
    
    654
    +      (complex-double-reg
    
    655
    +       (unless (location= x y)
    
    656
    +	 (inst movapd y x)))
    
    657
    +      (complex-double-stack
    
    658
    +       (inst movupd (ea-for-cdf-real-stack y fp) x)))))
    
    659
    +
    
    660
    +(define-move-vop move-complex-double-float-argument :move-argument
    
    661
    +  (complex-double-reg descriptor-reg) (complex-double-reg))
    
    662
    +
    
    663
    +#+double-double
    
    664
    +(define-vop (move-complex-double-double-float-argument)
    
    665
    +  (:args (x :scs (complex-double-double-reg) :target y)
    
    666
    +	 (fp :scs (any-reg) :load-if (not (sc-is y complex-double-double-reg))))
    
    667
    +  (:results (y))
    
    668
    +  (:note _N"complex double-double-float argument move")
    
    669
    +  (:generator 2
    
    670
    +    (sc-case y
    
    671
    +      (complex-double-double-reg
    
    672
    +       (unless (location= x y)
    
    673
    +	 (let ((x-real (complex-double-double-reg-real-hi-tn x))
    
    674
    +	       (y-real (complex-double-double-reg-real-hi-tn y)))
    
    675
    +	   (inst movsd y-real x-real))
    
    676
    +	 (let ((x-real (complex-double-double-reg-real-lo-tn x))
    
    677
    +	       (y-real (complex-double-double-reg-real-lo-tn y)))
    
    678
    +	   (inst movsd y-real x-real))
    
    679
    +	 (let ((x-imag (complex-double-double-reg-imag-hi-tn x))
    
    680
    +	       (y-imag (complex-double-double-reg-imag-hi-tn y)))
    
    681
    +	   (inst movsd y-imag x-imag))
    
    682
    +	 (let ((x-imag (complex-double-double-reg-imag-lo-tn x))
    
    683
    +	       (y-imag (complex-double-double-reg-imag-lo-tn y)))
    
    684
    +	   (inst movsd y-imag x-imag))))
    
    685
    +      (complex-double-double-stack
    
    686
    +       (let ((real-tn (complex-double-double-reg-real-hi-tn x)))
    
    687
    +	 (inst movsd (ea-for-cddf-real-hi-stack y fp) real-tn))
    
    688
    +       (let ((real-tn (complex-double-double-reg-real-lo-tn x)))
    
    689
    +	 (inst movsd (ea-for-cddf-real-lo-stack y fp) real-tn))
    
    690
    +       (let ((imag-tn (complex-double-double-reg-imag-hi-tn x)))
    
    691
    +	 (inst movsd (ea-for-cddf-imag-hi-stack y fp) imag-tn))
    
    692
    +       (let ((imag-tn (complex-double-double-reg-imag-lo-tn x)))
    
    693
    +	 (inst movsd (ea-for-cddf-imag-lo-stack y fp) imag-tn))))
    
    694
    +    ))
    
    695
    +
    
    696
    +#+double-double
    
    697
    +(define-move-vop move-complex-double-double-float-argument :move-argument
    
    698
    +  (complex-double-double-reg descriptor-reg) (complex-double-double-reg))
    
    699
    +
    
    700
    +(define-move-vop move-argument :move-argument
    
    701
    +  (single-reg double-reg #+long-float long-reg
    
    702
    +   #+double-double double-double-reg
    
    703
    +   complex-single-reg complex-double-reg #+long-float complex-long-reg
    
    704
    +   #+double-double complex-double-double-reg)
    
    705
    +  (descriptor-reg))
    
    706
    +
    
    707
    +
    
    708
    +;;;; Arithmetic VOPs:
    
    709
    +
    
    710
    +
    
    711
    +;;; dtc: The floating point arithmetic vops.
    
    712
    +;;; 
    
    713
    +;;; Note: Although these can accept x and y on the stack or pointed to
    
    714
    +;;; from a descriptor register, they will work with register loading
    
    715
    +;;; without these.  Same deal with the result - it need only be a
    
    716
    +;;; register.  When load-tns are needed they will probably be in ST0
    
    717
    +;;; and the code below should be able to correctly handle all cases.
    
    718
    +;;;
    
    719
    +;;; However it seems to produce better code if all arg. and result
    
    720
    +;;; options are used; on the P86 there is no extra cost in using a
    
    721
    +;;; memory operand to the FP instructions - not so on the PPro.
    
    722
    +;;;
    
    723
    +;;; It may also be useful to handle constant args?
    
    724
    +;;;
    
    725
    +;;; 22-Jul-97: descriptor args lose in some simple cases when
    
    726
    +;;; a function result computed in a loop. Then Python insists
    
    727
    +;;; on consing the intermediate values! For example
    
    728
    +#|
    
    729
    +(defun test(a n)
    
    730
    +  (declare (type (simple-array double-float (*)) a)
    
    731
    +	   (fixnum n))
    
    732
    +  (let ((sum 0d0))
    
    733
    +    (declare (type double-float sum))
    
    734
    +  (dotimes (i n)
    
    735
    +    (incf sum (* (aref a i)(aref a i))))
    
    736
    +    sum))
    
    737
    +|#
    
    738
    +;;; So, disabling descriptor args until this can be fixed elsewhere.
    
    739
    +;;;
    
    740
    +
    
    741
    +(define-vop (float-op)
    
    742
    +  (:args (x) (y))
    
    743
    +  (:results (r))
    
    744
    +  (:policy :fast-safe)
    
    745
    +  (:note _N"inline float arithmetic")
    
    746
    +  (:vop-var vop)
    
    747
    +  (:save-p :compute-only))
    
    748
    +
    
    749
    +(macrolet ((frob (name sc ptype)
    
    750
    +             `(define-vop (,name float-op)
    
    751
    +                (:args (x :scs (,sc) :target r)
    
    752
    +                       (y :scs (,sc)))
    
    753
    +                (:results (r :scs (,sc)))
    
    754
    +                (:arg-types ,ptype ,ptype)
    
    755
    +                (:result-types ,ptype))))
    
    756
    +  (frob single-float-op single-reg single-float)
    
    757
    +  (frob double-float-op double-reg double-float))
    
    758
    +
    
    759
    +(macrolet ((generate (movinst opinst commutative arg-type)
    
    760
    +	     (multiple-value-bind (rtype stack-sc ea ea-stack)
    
    761
    +		 (if (eq arg-type 'single)
    
    762
    +		     (values 'single-reg 'single-stack 'ea-for-sf-desc 'ea-for-sf-stack)
    
    763
    +		     (values 'double-reg 'double-stack 'ea-for-df-desc 'ea-for-df-stack))
    
    764
    +	       `(progn
    
    765
    +		  (cond
    
    766
    +		    ((location= x r)
    
    767
    +		     ;; x and r are the same.  We can just operate on x,
    
    768
    +		     ;; and we're done.
    
    769
    +		     (sc-case y
    
    770
    +		       (,rtype
    
    771
    +			(inst ,opinst x y))
    
    772
    +		       (descriptor-reg
    
    773
    +			(inst ,opinst x (,ea y)))
    
    774
    +		       (,stack-sc
    
    775
    +			(inst ,opinst x (,ea-stack y)))))
    
    776
    +		    ((and ,commutative (location= y r))
    
    777
    +		     ;; y = r and the operation is commutative, so just
    
    778
    +		     ;; do the operation with r and x.
    
    779
    +		     (inst ,opinst y x))
    
    780
    +		    ((not (location= r y))
    
    781
    +		     ;; x, y, and r are three different regs.  So just
    
    782
    +		     ;; move r to x and do the operation on r.
    
    783
    +		     (inst ,movinst r x)
    
    784
    +		     (sc-case y
    
    785
    +		       (,rtype
    
    786
    +			(inst ,opinst r y))
    
    787
    +		       (descriptor-reg
    
    788
    +			(inst ,opinst r (,ea y)))
    
    789
    +		       (,stack-sc
    
    790
    +			(inst, opinst r (,ea-stack y)))))
    
    791
    +		    (t
    
    792
    +		     ;; The hard case where the operation is not
    
    793
    +		     ;; commutative, but y might be r.  Don't want to
    
    794
    +		     ;; destroy y in this case, so use a temp so we
    
    795
    +		     ;; don't accidentally overwrite y.
    
    796
    +		     (inst ,movinst tmp x)
    
    797
    +		     (sc-case y
    
    798
    +		       (,rtype
    
    799
    +			(inst ,opinst tmp y))
    
    800
    +		       (descriptor-reg
    
    801
    +			(inst ,opinst tmp (,ea y)))
    
    802
    +		       (,stack-sc
    
    803
    +			(inst, opinst tmp (,ea-stack y))))
    
    804
    +		     (inst ,movinst r tmp))))))
    
    805
    +           (frob (op sinst sname scost dinst dname dcost commutative)
    
    806
    +             `(progn
    
    807
    +                (define-vop (,sname single-float-op)
    
    808
    +		  (:args (x :scs (single-reg) :target r)
    
    809
    +			 (y :scs (single-reg descriptor-reg)
    
    810
    +			    :load-if (not (sc-is y single-stack))))
    
    811
    +		  (:translate ,op)
    
    812
    +                  (:temporary (:sc single-reg) tmp)
    
    813
    +                  (:generator ,scost
    
    814
    +                    (generate movss ,sinst ,commutative single)))
    
    815
    +                (define-vop (,dname double-float-op)
    
    816
    +		  (:args (x :scs (double-reg) :target r)
    
    817
    +			 (y :scs (double-reg descriptor-reg)
    
    818
    +			    :load-if (not (sc-is y double-stack))))
    
    819
    +                  (:translate ,op)
    
    820
    +                  (:temporary (:sc double-reg) tmp)
    
    821
    +                  (:generator ,dcost
    
    822
    +                    (generate movsd ,dinst ,commutative double))))))
    
    823
    +  (frob + addss +/single-float 2 addsd +/double-float 2 t)
    
    824
    +  (frob - subss -/single-float 2 subsd -/double-float 2 nil)
    
    825
    +  (frob * mulss */single-float 4 mulsd */double-float 5 t)
    
    826
    +  (frob / divss //single-float 12 divsd //double-float 19 nil))
    
    827
    +
    
    828
    +(define-vop (fsqrt)
    
    829
    +  (:args (x :scs (double-reg)))
    
    830
    +  (:results (y :scs (double-reg)))
    
    831
    +  (:translate %sqrt)
    
    832
    +  (:policy :fast-safe)
    
    833
    +  (:arg-types double-float)
    
    834
    +  (:result-types double-float)
    
    835
    +  (:note _N"inline float arithmetic")
    
    836
    +  (:vop-var vop)
    
    837
    +  (:save-p :compute-only)
    
    838
    +  (:generator 1
    
    839
    +     (note-this-location vop :internal-error)
    
    840
    +     (inst sqrtsd y x)))
    
    841
    +
    
    842
    +(macrolet ((frob ((name translate mov sc type) &body body)
    
    843
    +             `(define-vop (,name)
    
    844
    +	        (:args (x :scs (,sc)))
    
    845
    +                (:results (y :scs (,sc)))
    
    846
    +                (:translate ,translate)
    
    847
    +                (:policy :fast-safe)
    
    848
    +                (:arg-types ,type)
    
    849
    +                (:result-types ,type)
    
    850
    +                (:temporary (:sc ,sc) tmp)
    
    851
    +                (:note _N"inline float arithmetic")
    
    852
    +                (:vop-var vop)
    
    853
    +                (:save-p :compute-only)
    
    854
    +                (:generator 1
    
    855
    +		  (note-this-location vop :internal-error)
    
    856
    +		  (inst pcmpeqd tmp tmp)		; all 1's
    
    857
    +		  ;; we should be able to do this better.  what we
    
    858
    +		  ;; really would like to do is use the target as the
    
    859
    +		  ;; temp whenever it's not also the source
    
    860
    +		  (unless (location= x y)
    
    861
    +		    (inst ,mov y x))
    
    862
    +		  ,@body))))
    
    863
    +  (frob (%negate/double-float %negate movsd double-reg double-float)
    
    864
    +	(inst psllq tmp 63)		; tmp = #x8000000000000000
    
    865
    +	(inst xorpd y tmp))
    
    866
    +  (frob (%negate/single-float %negate movss single-reg single-float)
    
    867
    +	(inst pslld tmp 31)		; tmp = #x80000000
    
    868
    +	(inst xorps y tmp))
    
    869
    +  (frob (abs/double-float abs  movsd double-reg double-float)
    
    870
    +	(inst psrlq tmp 1)		; tmp = #x7fffffffffffffff
    
    871
    +	(inst andpd y tmp))
    
    872
    +  (frob (abs/single-float abs movss single-reg single-float)
    
    873
    +	(inst psrld tmp 1)		; tmp = #x7fffffff
    
    874
    +	(inst andps y tmp)))
    
    875
    +
    
    876
    +
    
    877
    +;;;; Comparison:
    
    878
    +
    
    879
    +#+long-float
    
    880
    +(deftransform eql ((x y) (long-float long-float))
    
    881
    +  `(and (= (long-float-low-bits x) (long-float-low-bits y))
    
    882
    +	(= (long-float-high-bits x) (long-float-high-bits y))
    
    883
    +	(= (long-float-exp-bits x) (long-float-exp-bits y))))
    
    884
    +
    
    885
    +#+double-double
    
    886
    +(deftransform eql ((x y) (double-double-float double-double-float))
    
    887
    +  '(and (eql (double-double-hi x) (double-double-hi y))
    
    888
    +	(eql (double-double-lo x) (double-double-lo y))))
    
    889
    +
    
    890
    +
    
    891
    +;;;; comparison
    
    892
    +
    
    893
    +(define-vop (float-compare)
    
    894
    +  (:conditional)
    
    895
    +  (:info target not-p)
    
    896
    +  (:policy :fast-safe)
    
    897
    +  (:vop-var vop)
    
    898
    +  (:save-p :compute-only)
    
    899
    +  (:note _N"inline float comparison"))
    
    900
    +
    
    901
    +;;; comiss and comisd can cope with one or other arg in memory: we
    
    902
    +;;; could (should, indeed) extend these to cope with descriptor args
    
    903
    +;;; and stack args
    
    904
    +
    
    905
    +(define-vop (single-float-compare float-compare)
    
    906
    +  (:args (x :scs (single-reg)) (y :scs (single-reg descriptor-reg)))
    
    907
    +  (:conditional)
    
    908
    +  (:arg-types single-float single-float))
    
    909
    +(define-vop (double-float-compare float-compare)
    
    910
    +  (:args (x :scs (double-reg)) (y :scs (double-reg descriptor-reg)))
    
    911
    +  (:conditional)
    
    912
    +  (:arg-types double-float double-float))
    
    913
    +
    
    914
    +(define-vop (=/single-float single-float-compare)
    
    915
    +    (:translate =)
    
    916
    +  (:info target not-p)
    
    917
    +  (:vop-var vop)
    
    918
    +  (:generator 3
    
    919
    +    (note-this-location vop :internal-error)
    
    920
    +    (sc-case y
    
    921
    +      (single-reg
    
    922
    +       (inst ucomiss x y))
    
    923
    +      (descriptor-reg
    
    924
    +       (inst ucomiss x (ea-for-sf-desc y))))
    
    925
    +    ;; if PF&CF, there was a NaN involved => not equal
    
    926
    +    ;; otherwise, ZF => equal
    
    927
    +    (cond (not-p
    
    928
    +           (inst jmp :p target)
    
    929
    +           (inst jmp :ne target))
    
    930
    +          (t
    
    931
    +           (let ((not-lab (gen-label)))
    
    932
    +             (inst jmp :p not-lab)
    
    933
    +             (inst jmp :e target)
    
    934
    +             (emit-label not-lab))))))
    
    935
    +
    
    936
    +(define-vop (=/double-float double-float-compare)
    
    937
    +    (:translate =)
    
    938
    +  (:info target not-p)
    
    939
    +  (:vop-var vop)
    
    940
    +  (:generator 3
    
    941
    +    (note-this-location vop :internal-error)
    
    942
    +    (sc-case y
    
    943
    +      (double-reg
    
    944
    +       (inst ucomisd x y))
    
    945
    +      (descriptor-reg
    
    946
    +       (inst ucomisd x (ea-for-df-desc y))))
    
    947
    +    (cond (not-p
    
    948
    +           (inst jmp :p target)
    
    949
    +           (inst jmp :ne target))
    
    950
    +          (t
    
    951
    +           (let ((not-lab (gen-label)))
    
    952
    +             (inst jmp :p not-lab)
    
    953
    +             (inst jmp :e target)
    
    954
    +             (emit-label not-lab))))))
    
    955
    +
    
    956
    +(define-vop (</double-float double-float-compare)
    
    957
    +  (:translate <)
    
    958
    +  (:info target not-p)
    
    959
    +  (:generator 3
    
    960
    +    (sc-case y
    
    961
    +      (double-reg
    
    962
    +       (inst comisd x y))
    
    963
    +      (descriptor-reg
    
    964
    +       (inst comisd x (ea-for-df-desc y))))
    
    965
    +    (cond (not-p
    
    966
    +           (inst jmp :p target)
    
    967
    +           (inst jmp :nc target))
    
    968
    +          (t
    
    969
    +           (let ((not-lab (gen-label)))
    
    970
    +             (inst jmp :p not-lab)
    
    971
    +             (inst jmp :c target)
    
    972
    +             (emit-label not-lab))))))
    
    973
    +
    
    974
    +(define-vop (</single-float single-float-compare)
    
    975
    +  (:translate <)
    
    976
    +  (:info target not-p)
    
    977
    +  (:generator 3
    
    978
    +    (sc-case y
    
    979
    +      (single-reg
    
    980
    +       (inst comiss x y))
    
    981
    +      (descriptor-reg
    
    982
    +       (inst comiss x (ea-for-sf-desc y))))
    
    983
    +    (cond (not-p
    
    984
    +           (inst jmp :p target)
    
    985
    +           (inst jmp :nc target))
    
    986
    +          (t
    
    987
    +           (let ((not-lab (gen-label)))
    
    988
    +             (inst jmp :p not-lab)
    
    989
    +             (inst jmp :c target)
    
    990
    +             (emit-label not-lab))))))
    
    991
    +
    
    992
    +(define-vop (>/double-float double-float-compare)
    
    993
    +  (:translate >)
    
    994
    +  (:info target not-p)
    
    995
    +  (:generator 3
    
    996
    +    (sc-case y
    
    997
    +      (double-reg
    
    998
    +       (inst comisd x y))
    
    999
    +      (descriptor-reg
    
    1000
    +       (inst comisd x (ea-for-df-desc y))))
    
    1001
    +    (cond (not-p
    
    1002
    +           (inst jmp :p target)
    
    1003
    +           (inst jmp :na target))
    
    1004
    +          (t
    
    1005
    +           (let ((not-lab (gen-label)))
    
    1006
    +             (inst jmp :p not-lab)
    
    1007
    +             (inst jmp :a target)
    
    1008
    +             (emit-label not-lab))))))
    
    1009
    +
    
    1010
    +(define-vop (>/single-float single-float-compare)
    
    1011
    +  (:translate >)
    
    1012
    +  (:info target not-p)
    
    1013
    +  (:generator 3
    
    1014
    +    (sc-case y
    
    1015
    +      (single-reg
    
    1016
    +       (inst comiss x y))
    
    1017
    +      (descriptor-reg
    
    1018
    +       (inst comiss x (ea-for-sf-desc y))))
    
    1019
    +    (cond (not-p
    
    1020
    +           (inst jmp :p target)
    
    1021
    +           (inst jmp :na target))
    
    1022
    +          (t
    
    1023
    +           (let ((not-lab (gen-label)))
    
    1024
    +             (inst jmp :p not-lab)
    
    1025
    +             (inst jmp :a target)
    
    1026
    +             (emit-label not-lab))))))
    
    1027
    +
    
    1028
    +
    
    1029
    +
    
    1030
    +;;;; Conversion:
    
    1031
    +
    
    1032
    +(macrolet ((frob (name translate inst to-sc to-type)
    
    1033
    +             `(define-vop (,name)
    
    1034
    +                (:args (x :scs (signed-stack signed-reg)))
    
    1035
    +                (:results (y :scs (,to-sc)))
    
    1036
    +                (:arg-types signed-num)
    
    1037
    +                (:result-types ,to-type)
    
    1038
    +                (:policy :fast-safe)
    
    1039
    +                (:note _N"inline float coercion")
    
    1040
    +                (:translate ,translate)
    
    1041
    +                (:vop-var vop)
    
    1042
    +                (:save-p :compute-only)
    
    1043
    +                (:generator 5
    
    1044
    +                  (sc-case x
    
    1045
    +                    (signed-reg
    
    1046
    +                     (note-this-location vop :internal-error)
    
    1047
    +                     (inst ,inst y x))
    
    1048
    +                    (signed-stack
    
    1049
    +                     (note-this-location vop :internal-error)
    
    1050
    +                     (inst ,inst y x)))))))
    
    1051
    +  (frob %single-float/signed %single-float cvtsi2ss single-reg single-float)
    
    1052
    +  (frob %double-float/signed %double-float cvtsi2sd double-reg double-float))
    
    1053
    +
    
    1054
    +(macrolet ((frob (name translate inst from-sc from-type to-sc to-type)
    
    1055
    +	     (let ((ea (if (eq from-sc 'single-reg)
    
    1056
    +			   'ea-for-sf-desc
    
    1057
    +			   'ea-for-df-desc)))
    
    1058
    +	       `(define-vop (,name)
    
    1059
    +		  (:args (x :scs (,from-sc descriptor-reg) :target y))
    
    1060
    +		  (:results (y :scs (,to-sc)))
    
    1061
    +		  (:arg-types ,from-type)
    
    1062
    +		  (:result-types ,to-type)
    
    1063
    +		  (:policy :fast-safe)
    
    1064
    +		  (:note _N"inline float coercion")
    
    1065
    +		  (:translate ,translate)
    
    1066
    +		  (:vop-var vop)
    
    1067
    +		  (:save-p :compute-only)
    
    1068
    +		  (:generator 2
    
    1069
    +		    (note-this-location vop :internal-error)
    
    1070
    +		    (sc-case x
    
    1071
    +		      (,from-sc
    
    1072
    +		       (inst ,inst y x))
    
    1073
    +		      (descriptor-reg
    
    1074
    +		       (inst ,inst y (,ea x)))))))))
    
    1075
    +  (frob %single-float/double-float %single-float cvtsd2ss double-reg
    
    1076
    +	double-float single-reg single-float)
    
    1077
    +
    
    1078
    +  (frob %double-float/single-float %double-float cvtss2sd
    
    1079
    +	single-reg single-float double-reg double-float))
    
    1080
    +
    
    1081
    +(macrolet ((frob (trans inst from-sc from-type round-p)
    
    1082
    +             (declare (ignore round-p))
    
    1083
    +	     (let ((ea (if (eq from-sc 'single-reg)
    
    1084
    +			   'ea-for-sf-desc
    
    1085
    +			   'ea-for-df-desc)))
    
    1086
    +	       `(define-vop (,(symbolicate trans "/" from-type))
    
    1087
    +		  (:args (x :scs (,from-sc descriptor-reg)))
    
    1088
    +		  (:temporary (:sc any-reg) temp-reg)
    
    1089
    +		  (:results (y :scs (signed-reg)))
    
    1090
    +		  (:arg-types ,from-type)
    
    1091
    +		  (:result-types signed-num)
    
    1092
    +		  (:translate ,trans)
    
    1093
    +		  (:policy :fast-safe)
    
    1094
    +		  (:note _N"inline float truncate")
    
    1095
    +		  (:vop-var vop)
    
    1096
    +		  (:save-p :compute-only)
    
    1097
    +		  (:generator 5
    
    1098
    +		    (sc-case y
    
    1099
    +		      (signed-stack
    
    1100
    +		       (sc-case x
    
    1101
    +			 (,from-sc
    
    1102
    +			  (inst ,inst temp-reg x))
    
    1103
    +			 (descriptor-reg
    
    1104
    +			  (inst ,inst temp-reg (,ea x))))
    
    1105
    +		       (move y temp-reg))
    
    1106
    +		      (signed-reg
    
    1107
    +		       (sc-case x
    
    1108
    +			 (,from-sc
    
    1109
    +			  (inst ,inst y x))
    
    1110
    +			 (descriptor-reg
    
    1111
    +			  (inst ,inst y (,ea x)))))))))))
    
    1112
    +  (frob %unary-truncate cvttss2si single-reg single-float nil)
    
    1113
    +  (frob %unary-truncate cvttsd2si double-reg double-float nil)
    
    1114
    +
    
    1115
    +  (frob %unary-round cvtss2si single-reg single-float t)
    
    1116
    +  (frob %unary-round cvtsd2si double-reg double-float t))
    
    1117
    +
    
    1118
    +(define-vop (fast-unary-ftruncate/single-float)
    
    1119
    +  (:args (x :scs (single-reg descriptor-reg)))
    
    1120
    +  (:arg-types single-float)
    
    1121
    +  (:results (r :scs (single-reg)))
    
    1122
    +  (:result-types single-float)
    
    1123
    +  (:policy :fast-safe)
    
    1124
    +  (:translate c::fast-unary-ftruncate)
    
    1125
    +  (:temporary (:sc signed-reg) temp)
    
    1126
    +  (:note _N"inline ftruncate")
    
    1127
    +  (:generator 2
    
    1128
    +    (sc-case x
    
    1129
    +      (single-reg
    
    1130
    +       (inst cvttss2si temp x))
    
    1131
    +      (descriptor-reg
    
    1132
    +       (inst cvttss2si temp (ea-for-sf-desc x))))
    
    1133
    +    (inst cvtsi2ss r temp)))
    
    1134
    +
    
    1135
    +(define-vop (fast-unary-ftruncate/double-float)
    
    1136
    +  (:args (x :scs (double-reg descriptor-reg) :target r))
    
    1137
    +  (:arg-types double-float)
    
    1138
    +  (:results (r :scs (double-reg)))
    
    1139
    +  (:result-types double-float)
    
    1140
    +  (:policy :fast-safe)
    
    1141
    +  (:translate c::fast-unary-ftruncate)
    
    1142
    +  (:temporary (:sc signed-reg) temp)
    
    1143
    +  (:note _N"inline ftruncate")
    
    1144
    +  (:generator 2
    
    1145
    +    (sc-case x
    
    1146
    +      (double-reg
    
    1147
    +       (inst cvttsd2si temp x))
    
    1148
    +      (descriptor-reg
    
    1149
    +       (inst cvttsd2si temp (ea-for-df-desc x))))
    
    1150
    +    (inst cvtsi2sd r temp)))
    
    1151
    +
    
    1152
    +(define-vop (make-single-float)
    
    1153
    +  (:args (bits :scs (signed-reg) :target res
    
    1154
    +               :load-if (not (or (and (sc-is bits signed-stack)
    
    1155
    +                                      (sc-is res single-reg))
    
    1156
    +                                 (and (sc-is bits signed-stack)
    
    1157
    +                                      (sc-is res single-stack)
    
    1158
    +                                      (location= bits res))))))
    
    1159
    +  (:results (res :scs (single-reg single-stack)))
    
    1160
    +  (:arg-types signed-num)
    
    1161
    +  (:result-types single-float)
    
    1162
    +  (:translate make-single-float)
    
    1163
    +  (:policy :fast-safe)
    
    1164
    +  (:vop-var vop)
    
    1165
    +  (:generator 4
    
    1166
    +    (sc-case res
    
    1167
    +       (single-stack
    
    1168
    +        (sc-case bits
    
    1169
    +          (signed-reg
    
    1170
    +           (inst mov res bits))
    
    1171
    +          (signed-stack
    
    1172
    +           (assert (location= bits res)))))
    
    1173
    +       (single-reg
    
    1174
    +        (sc-case bits
    
    1175
    +          (signed-reg
    
    1176
    +           (inst movd res bits))
    
    1177
    +          (signed-stack
    
    1178
    +           (inst movd res bits)))))))
    
    1179
    +
    
    1180
    +(define-vop (make-double-float)
    
    1181
    +  (:args (hi-bits :scs (signed-reg))
    
    1182
    +	 (lo-bits :scs (unsigned-reg)))
    
    1183
    +  (:results (res :scs (double-reg)))
    
    1184
    +  (:arg-types signed-num unsigned-num)
    
    1185
    +  (:result-types double-float)
    
    1186
    +  (:translate make-double-float)
    
    1187
    +  (:temporary (:sc double-reg) temp)
    
    1188
    +  (:policy :fast-safe)
    
    1189
    +  (:vop-var vop)
    
    1190
    +  (:generator 4
    
    1191
    +    (inst movd temp hi-bits)
    
    1192
    +    (inst psllq temp 32)
    
    1193
    +    (inst movd res lo-bits)
    
    1194
    +    (inst orpd res temp)))
    
    1195
    +
    
    1196
    +(define-vop (single-float-bits)
    
    1197
    +  (:args (float :scs (single-reg descriptor-reg)
    
    1198
    +                :load-if (not (sc-is float single-stack))))
    
    1199
    +  (:results (bits :scs (signed-reg)))
    
    1200
    +  (:arg-types single-float)
    
    1201
    +  (:result-types signed-num)
    
    1202
    +  (:translate single-float-bits)
    
    1203
    +  (:policy :fast-safe)
    
    1204
    +  (:vop-var vop)
    
    1205
    +  (:generator 4
    
    1206
    +    (sc-case bits
    
    1207
    +      (signed-reg
    
    1208
    +       (sc-case float
    
    1209
    +         (single-reg
    
    1210
    +	  (inst movd bits float))
    
    1211
    +         (single-stack
    
    1212
    +          (move bits float))
    
    1213
    +         (descriptor-reg
    
    1214
    +	  (loadw
    
    1215
    +	   bits float vm:single-float-value-slot vm:other-pointer-type))))
    
    1216
    +      (signed-stack
    
    1217
    +       (sc-case float
    
    1218
    +         (single-reg
    
    1219
    +          (inst movss bits float)))))))
    
    1220
    +
    
    1221
    +(define-vop (double-float-high-bits)
    
    1222
    +  (:args (float :scs (double-reg descriptor-reg)
    
    1223
    +                :load-if (not (sc-is float double-stack))))
    
    1224
    +  (:results (hi-bits :scs (signed-reg)))
    
    1225
    +  (:temporary (:sc double-reg) temp)
    
    1226
    +  (:arg-types double-float)
    
    1227
    +  (:result-types signed-num)
    
    1228
    +  (:translate double-float-high-bits)
    
    1229
    +  (:policy :fast-safe)
    
    1230
    +  (:vop-var vop)
    
    1231
    +  (:generator 5
    
    1232
    +     (sc-case float
    
    1233
    +       (double-reg
    
    1234
    +	(inst movq temp float)
    
    1235
    +	(inst psrlq temp 32)
    
    1236
    +	(inst movd hi-bits temp))
    
    1237
    +       (double-stack
    
    1238
    +        (loadw hi-bits ebp-tn (- (1+ (tn-offset float)))))
    
    1239
    +       (descriptor-reg
    
    1240
    +        (loadw hi-bits float (1+ double-float-value-slot)
    
    1241
    +               vm:other-pointer-type)))))
    
    1242
    +
    
    1243
    +(define-vop (double-float-low-bits)
    
    1244
    +  (:args (float :scs (double-reg descriptor-reg)
    
    1245
    +                :load-if (not (sc-is float double-stack))))
    
    1246
    +  (:results (lo-bits :scs (unsigned-reg)))
    
    1247
    +  (:arg-types double-float)
    
    1248
    +  (:result-types unsigned-num)
    
    1249
    +  (:translate double-float-low-bits)
    
    1250
    +  (:policy :fast-safe)
    
    1251
    +  (:vop-var vop)
    
    1252
    +  (:generator 5
    
    1253
    +     (sc-case float
    
    1254
    +       (double-reg
    
    1255
    +	(inst movd lo-bits float))
    
    1256
    +       (double-stack
    
    1257
    +        (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float)))))
    
    1258
    +       (descriptor-reg
    
    1259
    +        (loadw lo-bits float vm:double-float-value-slot
    
    1260
    +	       vm:other-pointer-type)))))
    
    1261
    +
    
    1262
    +(define-vop (double-float-bits)
    
    1263
    +  (:args (float :scs (double-reg descriptor-reg)
    
    1264
    +		:load-if (not (sc-is float double-stack))
    
    1265
    +		:to (:result 1)))
    
    1266
    +  (:results (hi-bits :scs (signed-reg))
    
    1267
    +	    (lo-bits :scs (unsigned-reg)))
    
    1268
    +  (:arg-types double-float)
    
    1269
    +  (:result-types signed-num unsigned-num)
    
    1270
    +  (:temporary (:sc double-reg) temp)
    
    1271
    +  (:translate kernel::double-float-bits)
    
    1272
    +  (:policy :fast-safe)
    
    1273
    +  (:vop-var vop)
    
    1274
    +  (:generator 5
    
    1275
    +    (sc-case float
    
    1276
    +      (double-reg
    
    1277
    +        (inst movq temp float)
    
    1278
    +	(inst movd lo-bits temp)
    
    1279
    +	(inst psrlq temp 32)
    
    1280
    +	(inst movd hi-bits temp))
    
    1281
    +      (double-stack
    
    1282
    +       (loadw hi-bits ebp-tn (- (+ 1 (tn-offset float))))
    
    1283
    +       (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float)))))
    
    1284
    +      (descriptor-reg
    
    1285
    +       (loadw hi-bits float (1+ double-float-value-slot)
    
    1286
    +	   vm:other-pointer-type)
    
    1287
    +       (loadw lo-bits float vm:double-float-value-slot
    
    1288
    +	       vm:other-pointer-type)))))
    
    1289
    +
    
    1290
    +
    
    1291
    +;;;; Float mode hackery:
    
    1292
    +
    
    1293
    +(deftype float-modes () '(unsigned-byte 24))
    
    1294
    +
    
    1295
    +;; For the record, here is the format of the MXCSR register.
    
    1296
    +;;
    
    1297
    +;; Bit
    
    1298
    +;; 31-16      Reserved
    
    1299
    +;; 15         Flush to zero
    
    1300
    +;; 14-13      Rounding control
    
    1301
    +;; 12         precision mask (inexact)
    
    1302
    +;; 11         underflow mask
    
    1303
    +;; 10         overflow mask
    
    1304
    +;;  9         divide-by-zero mask
    
    1305
    +;;  8         denormal operation mask
    
    1306
    +;;  7         invalid operation mask
    
    1307
    +;;  6         denormals-are-zeros
    
    1308
    +;;  5         precision flag (inexact)
    
    1309
    +;;  4         underflow flag
    
    1310
    +;;  3         overflow flag
    
    1311
    +;;  2         divide-by-zero flag
    
    1312
    +;;  1         denormal operation flag
    
    1313
    +;;  0         invalid operation flag
    
    1314
    +;;
    
    1315
    +;; See below for rounding control
    
    1316
    +(defknown sse2-floating-point-modes () float-modes (flushable))
    
    1317
    +(defknown ((setf sse2-floating-point-modes)) (float-modes) float-modes)
    
    1318
    +
    
    1319
    +;; Returns exactly the mxcsr register, except the masks are flipped
    
    1320
    +;; because we want exception enable flags, not masks.
    
    1321
    +(define-vop (sse2-floating-point-modes)
    
    1322
    +  (:results (result :scs (unsigned-reg)))
    
    1323
    +  (:result-types unsigned-num)
    
    1324
    +  (:translate sse2-floating-point-modes)
    
    1325
    +  (:policy :fast-safe)
    
    1326
    +  (:temporary (:sc unsigned-stack) temp)
    
    1327
    +  (:generator 3
    
    1328
    +    (inst stmxcsr temp)
    
    1329
    +    (inst mov result temp)
    
    1330
    +    (inst xor result (ash #x3f 7))))
    
    1331
    +
    
    1332
    +;; Set mxcsr exactly to whatever is given, except we invert the
    
    1333
    +;; exception enable flags to make them match the exception mask flags.
    
    1334
    +(define-vop (set-sse2-floating-point-modes)
    
    1335
    +  (:args (new :scs (unsigned-reg) :to :result :target res))
    
    1336
    +  (:arg-types unsigned-num)
    
    1337
    +  (:results (res :scs (unsigned-reg)))
    
    1338
    +  (:result-types unsigned-num)
    
    1339
    +  (:translate (setf sse2-floating-point-modes))
    
    1340
    +  (:policy :fast-safe)
    
    1341
    +  (:temporary (:sc unsigned-stack) cw-stack)
    
    1342
    +  (:temporary (:sc unsigned-reg) temp)
    
    1343
    +  (:generator 8
    
    1344
    +    ;; The high 16 bits are reserved and will cause a segfault if set,
    
    1345
    +    ;; so clear out those bits.
    
    1346
    +    (inst mov temp new)
    
    1347
    +    (inst and temp #xffff)
    
    1348
    +    (inst xor temp (ash #x3f 7))	; Convert enables to masks
    
    1349
    +    (inst mov cw-stack temp)
    
    1350
    +    (inst ldmxcsr cw-stack)
    
    1351
    +    (move res new)))
    
    1352
    +
    
    1353
    +;; For the record here is the format of the x87 control and status
    
    1354
    +;; words:
    
    1355
    +;;
    
    1356
    +;; Status word:
    
    1357
    +;;
    
    1358
    +;; Bit
    
    1359
    +;; 15         FPU Busy
    
    1360
    +;; 14         Condition code C3
    
    1361
    +;; 13-11      top of stack
    
    1362
    +;; 10         Condition code C2
    
    1363
    +;;  9         Condition code C1
    
    1364
    +;;  8         Condition code C0
    
    1365
    +;;  7         Error summary status
    
    1366
    +;;  6         Stack fault
    
    1367
    +;;  5         precision flag (inexact)
    
    1368
    +;;  4         underflow flag
    
    1369
    +;;  3         overflow flag
    
    1370
    +;;  2         divide-by-zero flag
    
    1371
    +;;  1         denormal operation flag
    
    1372
    +;;  0         invalid operation flag
    
    1373
    +;;
    
    1374
    +;; Control word
    
    1375
    +;;
    
    1376
    +;; Bit
    
    1377
    +;; 15-13      Reserved
    
    1378
    +;; 12         Infinity control
    
    1379
    +;; 11-10      Rounding control
    
    1380
    +;;  9-8       Precision control
    
    1381
    +;;  7-6       Reserved
    
    1382
    +;;  5         precision mask (inexact)
    
    1383
    +;;  4         underflow mask
    
    1384
    +;;  3         overflow mask
    
    1385
    +;;  2         divide-by-zero mask
    
    1386
    +;;  1         denormal operation mask
    
    1387
    +;;  0         invalid operation mask
    
    1388
    +;;
    
    1389
    +;; Round control:
    
    1390
    +;;
    
    1391
    +;; 00   nearest
    
    1392
    +;; 01   negative infinity
    
    1393
    +;; 10   positive infinity
    
    1394
    +;; 11   zero (truncate)
    
    1395
    +;;
    
    1396
    +;; Precision control
    
    1397
    +;;
    
    1398
    +;; 00   single precision (24 bits)
    
    1399
    +;; 01   reserved
    
    1400
    +;; 10   double precision (53 bits)
    
    1401
    +;; 11   double extended precision (64 bits)
    
    1402
    +
    
    1403
    +(defknown x87-floating-point-modes () float-modes (flushable))
    
    1404
    +(defknown ((setf x87-floating-point-modes)) (float-modes)
    
    1405
    +  float-modes)
    
    1406
    +
    
    1407
    +;; Extract the control and status words from the FPU.  The low 16 bits
    
    1408
    +;; contain the control word, and the high 16 bits contain the status.
    
    1409
    +(define-vop (x87-floating-point-modes)
    
    1410
    +  (:results (res :scs (unsigned-reg)))
    
    1411
    +  (:result-types unsigned-num)
    
    1412
    +  (:translate x87-floating-point-modes)
    
    1413
    +  (:policy :fast-safe)
    
    1414
    +  (:temporary (:sc unsigned-stack) cw-stack)
    
    1415
    +  (:temporary (:sc unsigned-reg :offset eax-offset) sw-reg)
    
    1416
    +  (:generator 8
    
    1417
    +   (inst fnstsw)
    
    1418
    +   (inst fnstcw cw-stack)
    
    1419
    +   (inst and sw-reg #xff)		; mask exception flags
    
    1420
    +   (inst shl sw-reg 16)
    
    1421
    +   (inst byte #x66)			; operand size prefix
    
    1422
    +   (inst or sw-reg cw-stack)
    
    1423
    +   (inst xor sw-reg #x3f)		; invert exception mask
    
    1424
    +   (move res sw-reg)))
    
    1425
    +
    
    1426
    +;; Set the control and status words from the FPU.  The low 16 bits
    
    1427
    +;; contain the control word, and the high 16 bits contain the status.
    
    1428
    +(define-vop (x87-set-floating-point-modes)
    
    1429
    +  (:args (new :scs (unsigned-reg) :to :result :target res))
    
    1430
    +  (:results (res :scs (unsigned-reg)))
    
    1431
    +  (:arg-types unsigned-num)
    
    1432
    +  (:result-types unsigned-num)
    
    1433
    +  (:translate (setf x87-floating-point-modes))
    
    1434
    +  (:policy :fast-safe)
    
    1435
    +  (:temporary (:sc unsigned-stack) cw-stack)
    
    1436
    +  (:temporary (:sc byte-reg :offset al-offset) sw-reg)
    
    1437
    +  (:temporary (:sc unsigned-reg :offset ecx-offset) old)
    
    1438
    +  (:generator 6
    
    1439
    +   (inst mov cw-stack new)
    
    1440
    +   (inst xor cw-stack #x3f)  ; invert exception mask
    
    1441
    +   (inst fnstsw)
    
    1442
    +   (inst fldcw cw-stack)  ; always update the control word
    
    1443
    +   (inst mov old new)
    
    1444
    +   (inst shr old 16)
    
    1445
    +   (inst cmp cl-tn sw-reg)  ; compare exception flags
    
    1446
    +   (inst jmp :z DONE)  ; skip updating the status word
    
    1447
    +   (inst sub esp-tn 28)
    
    1448
    +   (inst fstenv (make-ea :dword :base esp-tn))
    
    1449
    +   (inst mov (make-ea :byte :base esp-tn :disp 4) cl-tn)
    
    1450
    +   (inst fldenv (make-ea :dword :base esp-tn))
    
    1451
    +   (inst add esp-tn 28)
    
    1452
    +   DONE
    
    1453
    +   (move res new)))
    
    1454
    +
    
    1455
    +
    
    1456
    +(defun sse2-floating-point-modes ()
    
    1457
    +  (sse2-floating-point-modes))
    
    1458
    +(defun (setf sse2-floating-point-modes) (new)
    
    1459
    +  (setf (sse2-floating-point-modes) new))
    
    1460
    +
    
    1461
    +(defun x87-floating-point-modes ()
    
    1462
    +  (x87-floating-point-modes))
    
    1463
    +(defun (setf x87-floating-point-modes) (new)
    
    1464
    +  (setf (x87-floating-point-modes) new))
    
    1465
    +
    
    1466
    +
    
    1467
    +;;;; Complex float VOPs
    
    1468
    +(define-vop (make-complex-single-float)
    
    1469
    +  (:translate complex)
    
    1470
    +  (:args (real :scs (single-reg) :to :save)
    
    1471
    +	 (imag :scs (single-reg) :to :save))
    
    1472
    +  (:arg-types single-float single-float)
    
    1473
    +  (:results (r :scs (complex-single-reg) :from (:argument 0)
    
    1474
    +	       :load-if (not (sc-is r complex-single-stack))))
    
    1475
    +  (:result-types complex-single-float)
    
    1476
    +  (:temporary (:sc complex-single-reg) temp)
    
    1477
    +  (:note _N"inline complex single-float creation")
    
    1478
    +  (:policy :fast-safe)
    
    1479
    +  (:generator 5
    
    1480
    +    (sc-case r
    
    1481
    +      (complex-single-reg
    
    1482
    +       ;; x = a + b*i = b|a
    
    1483
    +       (inst xorps temp temp)		; temp = 0|0|0|0
    
    1484
    +       (inst movss temp real)		; temp = 0|0|0|a
    
    1485
    +       (inst unpcklps temp imag)	; temp = 0|0|b|a
    
    1486
    +       (inst movaps r temp))
    
    1487
    +      (complex-single-stack
    
    1488
    +       (inst movss (ea-for-csf-real-stack r) real)
    
    1489
    +       (inst movss (ea-for-csf-imag-stack r) imag)))))
    
    1490
    +
    
    1491
    +(define-vop (make-complex-double-float)
    
    1492
    +  (:translate complex)
    
    1493
    +  (:args (real :scs (double-reg) :to :save)
    
    1494
    +	 (imag :scs (double-reg) :to :save))
    
    1495
    +  (:arg-types double-float double-float)
    
    1496
    +  (:results (r :scs (complex-double-reg) :from (:argument 0)
    
    1497
    +	       :load-if (not (sc-is r complex-double-stack))))
    
    1498
    +  (:result-types complex-double-float)
    
    1499
    +  (:temporary (:sc complex-double-reg) temp)
    
    1500
    +  (:note _N"inline complex double-float creation")
    
    1501
    +  (:policy :fast-safe)
    
    1502
    +  (:generator 5
    
    1503
    +    (sc-case r
    
    1504
    +      (complex-double-reg
    
    1505
    +       ;; x = a + b*i = b|a
    
    1506
    +       (inst movsd temp real)		; temp = ?|a
    
    1507
    +       (inst unpcklpd temp imag)	; temp = b|a
    
    1508
    +       (inst movapd r temp))
    
    1509
    +      (complex-double-stack
    
    1510
    +       (inst movsd (ea-for-cdf-real-stack r) real)
    
    1511
    +       (inst movsd (ea-for-cdf-imag-stack r) imag)))))
    
    1512
    +
    
    1513
    +(define-vop (realpart/complex-single-float)
    
    1514
    +  (:translate realpart)
    
    1515
    +  (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg)))
    
    1516
    +  (:arg-types complex-single-float)
    
    1517
    +  (:results (r :scs (single-reg)))
    
    1518
    +  (:result-types single-float)
    
    1519
    +  (:temporary (:sc single-reg) temp)
    
    1520
    +  (:policy :fast-safe)
    
    1521
    +  (:note _N"complex float realpart")
    
    1522
    +  (:generator 3
    
    1523
    +    (sc-case x
    
    1524
    +      (complex-single-reg
    
    1525
    +       (cond ((location= r x)
    
    1526
    +	      (inst xorps temp temp)	; temp = 0|0|0|0
    
    1527
    +	      (inst movss temp x)	; temp = 0|0|0|x
    
    1528
    +	      (inst movss r temp))	; r = temp
    
    1529
    +	     (t
    
    1530
    +	      (inst xorps r r)		; temp = 0|0|0|0
    
    1531
    +	      (inst movss r x))))	; r = 0|0|0|x
    
    1532
    +      (complex-single-stack
    
    1533
    +       (inst movss r (ea-for-csf-real-stack x)))
    
    1534
    +      (descriptor-reg
    
    1535
    +       (inst movss r (ea-for-csf-real-desc x))))))
    
    1536
    +
    
    1537
    +(define-vop (realpart/complex-double-float)
    
    1538
    +  (:translate realpart)
    
    1539
    +  (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg)))
    
    1540
    +  (:arg-types complex-double-float)
    
    1541
    +  (:results (r :scs (double-reg)))
    
    1542
    +  (:result-types double-float)
    
    1543
    +  (:temporary (:sc double-reg) temp)
    
    1544
    +  (:policy :fast-safe)
    
    1545
    +  (:note "complex float realpart")
    
    1546
    +  (:generator 3
    
    1547
    +    (sc-case x
    
    1548
    +      (complex-double-reg
    
    1549
    +       (cond ((location= r x)
    
    1550
    +	      (inst xorpd temp temp)	; temp = 0|0
    
    1551
    +	      (inst movsd temp x)	; temp = 0|x
    
    1552
    +	      (inst movsd r temp))	; r = temp
    
    1553
    +	     (t
    
    1554
    +	      (inst xorpd r r)		; r = 0|0
    
    1555
    +	      (inst movsd r x))))	; r = 0|x
    
    1556
    +      (complex-double-stack
    
    1557
    +       (inst movsd r (ea-for-cdf-real-stack x)))
    
    1558
    +      (descriptor-reg
    
    1559
    +       (inst movsd r (ea-for-cdf-real-desc x))))))
    
    1560
    +
    
    1561
    +(define-vop (imagpart/complex-single-float)
    
    1562
    +  (:translate imagpart)
    
    1563
    +  (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg)))
    
    1564
    +  (:arg-types complex-single-float)
    
    1565
    +  (:results (r :scs (single-reg)))
    
    1566
    +  (:result-types single-float)
    
    1567
    +  (:temporary (:sc complex-single-reg) temp)
    
    1568
    +  (:policy :fast-safe)
    
    1569
    +  (:note _N"complex float imagpart")
    
    1570
    +  (:generator 3
    
    1571
    +    (sc-case x
    
    1572
    +      (complex-single-reg
    
    1573
    +       ;; x = a+b*i = b|a
    
    1574
    +       ;; Get the imag part to the low part of temp.  We don't care about
    
    1575
    +       ;; the other parts of r.
    
    1576
    +       (inst movaps temp x)		; temp = u|u|b|a
    
    1577
    +       (inst shufps temp x #b01)	; temp = a|a|a|b
    
    1578
    +       (inst xorps r r)			; r = 0|0|0|0
    
    1579
    +       (inst movss r temp)		; r = 0|0|0|b
    
    1580
    +       )
    
    1581
    +      (complex-single-stack
    
    1582
    +       (inst movss r (ea-for-csf-imag-stack x)))
    
    1583
    +      (descriptor-reg
    
    1584
    +       (inst movss r (ea-for-csf-imag-desc x))))))
    
    1585
    +
    
    1586
    +(define-vop (imagpart/complex-double-float)
    
    1587
    +  (:translate imagpart)
    
    1588
    +  (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg)))
    
    1589
    +  (:arg-types complex-double-float)
    
    1590
    +  (:results (r :scs (double-reg)))
    
    1591
    +  (:result-types double-float)
    
    1592
    +  (:temporary (:sc complex-double-reg) temp)
    
    1593
    +  (:policy :fast-safe)
    
    1594
    +  (:note _N"complex float imagpart")
    
    1595
    +  (:generator 3
    
    1596
    +    (sc-case x
    
    1597
    +      (complex-double-reg
    
    1598
    +       (cond ((location= r x)
    
    1599
    +	      (inst xorpd temp temp)	; temp = 0|0
    
    1600
    +	      (inst movhlps temp x)	; temp = 0|b
    
    1601
    +	      (inst movsd r temp))	; r = temp
    
    1602
    +	     (t
    
    1603
    +	      (inst xorpd r r)		; r = 0|0
    
    1604
    +	      (inst movhlps r x))))	; r = 0|b
    
    1605
    +      (complex-double-stack
    
    1606
    +       (inst movsd r (ea-for-cdf-imag-stack x)))
    
    1607
    +      (descriptor-reg
    
    1608
    +       (inst movsd r (ea-for-cdf-imag-desc x))))))
    
    1609
    +
    
    1610
    +;;; A hack dummy VOP to bias the representation selection of its
    
    1611
    +;;; argument towards a FP register which can help avoid consing at
    
    1612
    +;;; inappropriate locations.
    
    1613
    +
    
    1614
    +(defknown double-float-reg-bias (double-float) (values))
    
    1615
    +;;;
    
    1616
    +(define-vop (double-float-reg-bias)
    
    1617
    +  (:translate double-float-reg-bias)
    
    1618
    +  (:args (x :scs (double-reg double-stack) :load-if nil))
    
    1619
    +  (:arg-types double-float)
    
    1620
    +  (:policy :fast-safe)
    
    1621
    +  (:note _N"inline dummy FP register bias")
    
    1622
    +  (:ignore x)
    
    1623
    +  (:generator 0))
    
    1624
    +
    
    1625
    +(defknown single-float-reg-bias (single-float) (values))
    
    1626
    +;;;
    
    1627
    +(define-vop (single-float-reg-bias)
    
    1628
    +  (:translate single-float-reg-bias)
    
    1629
    +  (:args (x :scs (single-reg single-stack) :load-if nil))
    
    1630
    +  (:arg-types single-float)
    
    1631
    +  (:policy :fast-safe)
    
    1632
    +  (:note _N"inline dummy FP register bias")
    
    1633
    +  (:ignore x)
    
    1634
    +  (:generator 0))
    
    1635
    +
    
    1636
    +;;; Support for double-double floats
    
    1637
    +
    
    1638
    +#+double-double
    
    1639
    +(progn
    
    1640
    +
    
    1641
    +(defun double-double-reg-hi-tn (x)
    
    1642
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    1643
    +		  :offset (tn-offset x)))
    
    1644
    +
    
    1645
    +(defun double-double-reg-lo-tn (x)
    
    1646
    +  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
    
    1647
    +		  :offset (1+ (tn-offset x))))
    
    1648
    +
    
    1649
    +(define-move-function (load-double-double 4) (vop x y)
    
    1650
    +  ((double-double-stack) (double-double-reg))
    
    1651
    +  (let ((real-tn (double-double-reg-hi-tn y)))
    
    1652
    +    (inst movsd real-tn (ea-for-cdf-real-stack x)))
    
    1653
    +  (let ((imag-tn (double-double-reg-lo-tn y)))
    
    1654
    +    (inst movsd imag-tn (ea-for-cdf-imag-stack x))))
    
    1655
    +
    
    1656
    +(define-move-function (store-double-double 4) (vop x y)
    
    1657
    +  ((double-double-reg) (double-double-stack))
    
    1658
    +  (let ((real-tn (double-double-reg-hi-tn x)))
    
    1659
    +    (inst movsd (ea-for-cdf-real-stack y) real-tn))
    
    1660
    +  (let ((imag-tn (double-double-reg-lo-tn x)))
    
    1661
    +    (inst movsd (ea-for-cdf-imag-stack y) imag-tn)))
    
    1662
    +
    
    1663
    +;;; Double-double float register to register moves
    
    1664
    +
    
    1665
    +(define-vop (double-double-move)
    
    1666
    +  (:args (x :scs (double-double-reg)
    
    1667
    +	    :target y :load-if (not (location= x y))))
    
    1668
    +  (:results (y :scs (double-double-reg) :load-if (not (location= x y))))
    
    1669
    +  (:note _N"double-double float move")
    
    1670
    +  (:generator 0
    
    1671
    +     (unless (location= x y)
    
    1672
    +       ;; Note the double-float-regs are aligned to every second
    
    1673
    +       ;; float register so there is not need to worry about overlap.
    
    1674
    +       (let ((x-hi (double-double-reg-hi-tn x))
    
    1675
    +	     (y-hi (double-double-reg-hi-tn y)))
    
    1676
    +	 (inst movsd y-hi x-hi)
    
    1677
    +       (let ((x-lo (double-double-reg-lo-tn x))
    
    1678
    +	     (y-lo (double-double-reg-lo-tn y)))
    
    1679
    +	 (inst movsd y-lo x-lo))))))
    
    1680
    +;;;
    
    1681
    +(define-move-vop double-double-move :move
    
    1682
    +  (double-double-reg) (double-double-reg))
    
    1683
    +
    
    1684
    +;;; Move from a complex float to a descriptor register allocating a
    
    1685
    +;;; new complex float object in the process.
    
    1686
    +
    
    1687
    +(define-vop (move-from-double-double)
    
    1688
    +  (:args (x :scs (double-double-reg) :to :save))
    
    1689
    +  (:results (y :scs (descriptor-reg)))
    
    1690
    +  (:node-var node)
    
    1691
    +  (:note _N"double double float to pointer coercion")
    
    1692
    +  (:generator 13
    
    1693
    +     (with-fixed-allocation (y vm:double-double-float-type
    
    1694
    +			       vm:double-double-float-size node)
    
    1695
    +       (let ((real-tn (double-double-reg-hi-tn x)))
    
    1696
    +	 (inst movsd (ea-for-cdf-real-desc y) real-tn))
    
    1697
    +       (let ((imag-tn (double-double-reg-lo-tn x)))
    
    1698
    +	 (inst movsd (ea-for-cdf-imag-desc y) imag-tn)))))
    
    1699
    +;;;
    
    1700
    +(define-move-vop move-from-double-double :move
    
    1701
    +  (double-double-reg) (descriptor-reg))
    
    1702
    +
    
    1703
    +;;; Move from a descriptor to a double-double float register
    
    1704
    +
    
    1705
    +(define-vop (move-to-double-double)
    
    1706
    +  (:args (x :scs (descriptor-reg)))
    
    1707
    +  (:results (y :scs (double-double-reg)))
    
    1708
    +  (:note _N"pointer to double-double-float coercion")
    
    1709
    +  (:generator 2
    
    1710
    +    (let ((real-tn (double-double-reg-hi-tn y)))
    
    1711
    +      (inst movsd real-tn (ea-for-cdf-real-desc x)))
    
    1712
    +    (let ((imag-tn (double-double-reg-lo-tn y)))
    
    1713
    +      (inst movsd imag-tn (ea-for-cdf-imag-desc x)))))
    
    1714
    +
    
    1715
    +(define-move-vop move-to-double-double :move
    
    1716
    +  (descriptor-reg) (double-double-reg))
    
    1717
    +
    
    1718
    +;;; double-double float move-argument vop
    
    1719
    +
    
    1720
    +(define-vop (move-double-double-float-argument)
    
    1721
    +  (:args (x :scs (double-double-reg) :target y)
    
    1722
    +	 (fp :scs (any-reg) :load-if (not (sc-is y double-double-reg))))
    
    1723
    +  (:results (y))
    
    1724
    +  (:note _N"double double-float argument move")
    
    1725
    +  (:generator 2
    
    1726
    +    (sc-case y
    
    1727
    +      (double-double-reg
    
    1728
    +       (unless (location= x y)
    
    1729
    +	 (let ((x-real (double-double-reg-hi-tn x))
    
    1730
    +	       (y-real (double-double-reg-hi-tn y)))
    
    1731
    +	   (inst movsd y-real x-real))
    
    1732
    +	 (let ((x-imag (double-double-reg-lo-tn x))
    
    1733
    +	       (y-imag (double-double-reg-lo-tn y)))
    
    1734
    +	   (inst movsd y-imag x-imag))))
    
    1735
    +      (double-double-stack
    
    1736
    +       (let ((hi-tn (double-double-reg-hi-tn x)))
    
    1737
    +	 (inst movsd (ea-for-cdf-real-stack y fp) hi-tn))
    
    1738
    +       (let ((lo-tn (double-double-reg-lo-tn x)))
    
    1739
    +	 (inst movsd (ea-for-cdf-imag-stack y fp) lo-tn))))))
    
    1740
    +
    
    1741
    +(define-move-vop move-double-double-float-argument :move-argument
    
    1742
    +  (double-double-reg descriptor-reg) (double-double-reg))
    
    1743
    +
    
    1744
    +
    
    1745
    +(define-vop (move-to-complex-double-double)
    
    1746
    +  (:args (x :scs (descriptor-reg)))
    
    1747
    +  (:results (y :scs (complex-double-double-reg)))
    
    1748
    +  (:note _N"pointer to complex float coercion")
    
    1749
    +  (:generator 2
    
    1750
    +    (let ((real-tn (complex-double-double-reg-real-hi-tn y)))
    
    1751
    +      (inst movsd real-tn (ea-for-cddf-real-hi-desc x)))
    
    1752
    +    (let ((real-tn (complex-double-double-reg-real-lo-tn y)))
    
    1753
    +      (inst movsd real-tn (ea-for-cddf-real-lo-desc x)))
    
    1754
    +    (let ((imag-tn (complex-double-double-reg-imag-hi-tn y)))
    
    1755
    +      (inst movsd imag-tn (ea-for-cddf-imag-hi-desc x)))
    
    1756
    +    (let ((imag-tn (complex-double-double-reg-imag-lo-tn y)))
    
    1757
    +      (inst movsd imag-tn (ea-for-cddf-imag-lo-desc x)))))
    
    1758
    +
    
    1759
    +(define-move-vop move-to-complex-double-double :move
    
    1760
    +  (descriptor-reg) (complex-double-double-reg))
    
    1761
    +
    
    1762
    +
    
    1763
    +(define-vop (make/double-double-float)
    
    1764
    +  (:args (hi :scs (double-reg) :target r
    
    1765
    +	     :load-if (not (location= hi r)))
    
    1766
    +	 (lo :scs (double-reg) :to :save))
    
    1767
    +  (:results (r :scs (double-double-reg) :from (:argument 0)
    
    1768
    +	       :load-if (not (sc-is r double-double-stack))))
    
    1769
    +  (:arg-types double-float double-float)
    
    1770
    +  (:result-types double-double-float)
    
    1771
    +  (:translate kernel::%make-double-double-float)
    
    1772
    +  (:note _N"inline double-double-float creation")
    
    1773
    +  (:policy :fast-safe)
    
    1774
    +  (:vop-var vop)
    
    1775
    +  (:generator 5
    
    1776
    +    (sc-case r
    
    1777
    +      (double-double-reg
    
    1778
    +       (let ((r-real (double-double-reg-hi-tn r)))
    
    1779
    +	 (unless (location= hi r-real)
    
    1780
    +	   (inst movsd r-real hi)))
    
    1781
    +       (let ((r-imag (double-double-reg-lo-tn r)))
    
    1782
    +	 (unless (location= lo r-imag)
    
    1783
    +	   (inst movsd r-imag lo))))
    
    1784
    +      (double-double-stack
    
    1785
    +       (unless (location= hi r)
    
    1786
    +	 (inst movsd (ea-for-cdf-real-stack r) hi))
    
    1787
    +       (inst movsd (ea-for-cdf-imag-stack r) lo)))))
    
    1788
    +
    
    1789
    +(define-vop (double-double-value)
    
    1790
    +  (:args (x :target r))
    
    1791
    +  (:results (r))
    
    1792
    +  (:variant-vars offset)
    
    1793
    +  (:policy :fast-safe)
    
    1794
    +  (:generator 3
    
    1795
    +    (cond ((sc-is x double-double-reg)
    
    1796
    +	   (let ((value-tn
    
    1797
    +		  (make-random-tn :kind :normal
    
    1798
    +				  :sc (sc-or-lose 'double-reg *backend*)
    
    1799
    +				  :offset (+ offset (tn-offset x)))))
    
    1800
    +	     (unless (location= value-tn r)
    
    1801
    +	       (inst movsd r value-tn))))
    
    1802
    +	  ((sc-is r double-reg)
    
    1803
    +	   (let ((ea (sc-case x
    
    1804
    +		       (double-double-stack
    
    1805
    +			(ecase offset
    
    1806
    +			  (0 (ea-for-cdf-real-stack x))
    
    1807
    +			  (1 (ea-for-cdf-imag-stack x))))
    
    1808
    +		       (descriptor-reg
    
    1809
    +			(ecase offset
    
    1810
    +			  (0 (ea-for-cdf-real-desc x))
    
    1811
    +			  (1 (ea-for-cdf-imag-desc x)))))))
    
    1812
    +	     (inst movsd r ea)))
    
    1813
    +	  (t (error "double-double-value VOP failure")))))
    
    1814
    +
    
    1815
    +
    
    1816
    +(define-vop (hi/double-double-value double-double-value)
    
    1817
    +  (:translate kernel::double-double-hi)
    
    1818
    +  (:args (x :scs (double-double-reg double-double-stack descriptor-reg)
    
    1819
    +	    :target r))
    
    1820
    +  (:arg-types double-double-float)
    
    1821
    +  (:results (r :scs (double-reg)))
    
    1822
    +  (:result-types double-float)
    
    1823
    +  (:note _N"double-double high part")
    
    1824
    +  (:variant 0))
    
    1825
    +
    
    1826
    +(define-vop (lo/double-double-value double-double-value)
    
    1827
    +  (:translate kernel::double-double-lo)
    
    1828
    +  (:args (x :scs (double-double-reg double-double-stack descriptor-reg)
    
    1829
    +	    :target r))
    
    1830
    +  (:arg-types double-double-float)
    
    1831
    +  (:results (r :scs (double-reg)))
    
    1832
    +  (:result-types double-float)
    
    1833
    +  (:note _N"double-double low part")
    
    1834
    +  (:variant 1))
    
    1835
    +
    
    1836
    +(define-vop (make-complex-double-double-float)
    
    1837
    +  (:translate complex)
    
    1838
    +  (:args (real :scs (double-double-reg) :target r
    
    1839
    +	       :load-if (not (location= real r))
    
    1840
    +	       )
    
    1841
    +	 (imag :scs (double-double-reg) :to :save))
    
    1842
    +  (:arg-types double-double-float double-double-float)
    
    1843
    +  (:results (r :scs (complex-double-double-reg) :from (:argument 0)
    
    1844
    +	       :load-if (not (sc-is r complex-double-double-stack))))
    
    1845
    +  (:result-types complex-double-double-float)
    
    1846
    +  (:note _N"inline complex double-double-float creation")
    
    1847
    +  (:policy :fast-safe)
    
    1848
    +  (:generator 5
    
    1849
    +    (sc-case r
    
    1850
    +      (complex-double-double-reg
    
    1851
    +       (let ((r-real (complex-double-double-reg-real-hi-tn r))
    
    1852
    +	     (a-real (double-double-reg-hi-tn real)))
    
    1853
    +	 (unless (location= a-real r-real)
    
    1854
    +	   (inst movsd r-real a-real)))
    
    1855
    +       (let ((r-real (complex-double-double-reg-real-lo-tn r))
    
    1856
    +	     (a-real (double-double-reg-lo-tn real)))
    
    1857
    +	 (unless (location= a-real r-real)
    
    1858
    +	   (inst movsd r-real a-real)))
    
    1859
    +       (let ((r-imag (complex-double-double-reg-imag-hi-tn r))
    
    1860
    +	     (a-imag (double-double-reg-hi-tn imag)))
    
    1861
    +	 (unless (location= a-imag r-imag)
    
    1862
    +	   (inst movsd r-imag a-imag)))
    
    1863
    +       (let ((r-imag (complex-double-double-reg-imag-lo-tn r))
    
    1864
    +	     (a-imag (double-double-reg-lo-tn imag)))
    
    1865
    +	 (unless (location= a-imag r-imag)
    
    1866
    +	   (inst movsd r-imag a-imag))))
    
    1867
    +      (complex-double-double-stack
    
    1868
    +       (unless (location= real r)
    
    1869
    +	 (inst movsd (ea-for-cddf-real-hi-stack r) real))
    
    1870
    +       (let ((real-lo (double-double-reg-lo-tn real)))
    
    1871
    +	 (inst movsd (ea-for-cddf-real-lo-stack r) real-lo))
    
    1872
    +       (let ((imag-val (double-double-reg-hi-tn imag)))
    
    1873
    +	 (inst movsd (ea-for-cddf-imag-hi-stack r) imag-val))
    
    1874
    +       (let ((imag-val (double-double-reg-lo-tn imag)))
    
    1875
    +	 (inst movsd (ea-for-cddf-imag-lo-stack r) imag-val))))))
    
    1876
    +
    
    1877
    +(define-vop (complex-double-double-float-value)
    
    1878
    +  (:args (x :scs (complex-double-double-reg descriptor-reg) :target r
    
    1879
    +	    :load-if (not (sc-is x complex-double-double-stack))))
    
    1880
    +  (:arg-types complex-double-double-float)
    
    1881
    +  (:results (r :scs (double-double-reg)))
    
    1882
    +  (:result-types double-double-float)
    
    1883
    +  (:variant-vars slot)
    
    1884
    +  (:policy :fast-safe)
    
    1885
    +  (:generator 3
    
    1886
    +    (sc-case x
    
    1887
    +      (complex-double-double-reg
    
    1888
    +       (let ((value-tn (ecase slot
    
    1889
    +			 (:real (complex-double-double-reg-real-hi-tn x))
    
    1890
    +			 (:imag (complex-double-double-reg-imag-hi-tn x))))
    
    1891
    +	     (r-hi (double-double-reg-hi-tn r)))
    
    1892
    +	 (unless (location= value-tn r-hi)
    
    1893
    +	   (inst movsd r-hi value-tn)))
    
    1894
    +       (let ((value-tn (ecase slot
    
    1895
    +			 (:real (complex-double-double-reg-real-lo-tn x))
    
    1896
    +			 (:imag (complex-double-double-reg-imag-lo-tn x))))
    
    1897
    +	     (r-lo (double-double-reg-lo-tn r)))
    
    1898
    +	 (unless (location= value-tn r-lo)
    
    1899
    +	   (inst movsd r-lo value-tn))))
    
    1900
    +      (complex-double-double-stack
    
    1901
    +       (let ((r-hi (double-double-reg-hi-tn r)))
    
    1902
    +	 (inst movsd r-hi (ecase slot
    
    1903
    +			    (:real (ea-for-cddf-real-hi-stack x))
    
    1904
    +			    (:imag (ea-for-cddf-imag-hi-stack x)))))
    
    1905
    +       (let ((r-lo (double-double-reg-lo-tn r)))
    
    1906
    +	 (inst movsd r-lo (ecase slot
    
    1907
    +			    (:real (ea-for-cddf-real-lo-stack x))
    
    1908
    +			    (:imag (ea-for-cddf-imag-lo-stack x))))))
    
    1909
    +      (descriptor-reg
    
    1910
    +       (let ((r-hi (double-double-reg-hi-tn r)))
    
    1911
    +	 (inst movsd r-hi (ecase slot
    
    1912
    +			    (:real (ea-for-cddf-real-hi-desc x))
    
    1913
    +			    (:imag (ea-for-cddf-imag-hi-desc x)))))
    
    1914
    +       (let ((r-lo (double-double-reg-lo-tn r)))
    
    1915
    +	 (inst movsd r-lo (ecase slot
    
    1916
    +			    (:real (ea-for-cddf-real-lo-desc x))
    
    1917
    +			    (:imag (ea-for-cddf-imag-lo-desc x)))))))))
    
    1918
    +
    
    1919
    +(define-vop (realpart/complex-double-double-float complex-double-double-float-value)
    
    1920
    +  (:translate realpart)
    
    1921
    +  (:note _N"complex float realpart")
    
    1922
    +  (:variant :real))
    
    1923
    +
    
    1924
    +(define-vop (imagpart/complex-double-double-float complex-double-double-float-value)
    
    1925
    +  (:translate imagpart)
    
    1926
    +  (:note _N"complex float imagpart")
    
    1927
    +  (:variant :imag))
    
    1928
    +
    
    1929
    +); progn
    
    1930
    +
    
    1931
    +
    
    1932
    +;;; Vops for complex arithmetic.  These are usually much faster than
    
    1933
    +;;; the compiler-generated code using deftransforms.
    
    1934
    +
    
    1935
    +;; Negate a complex
    
    1936
    +(macrolet
    
    1937
    +    ((negate-complex (type shift xor amount)
    
    1938
    +       (let ((name (symbolicate "%NEGATE/COMPLEX-" type "-FLOAT"))
    
    1939
    +	     (sc-type (symbolicate "COMPLEX-" type "-FLOAT"))
    
    1940
    +	     (sc (symbolicate "COMPLEX-" type "-REG")))
    
    1941
    +	 `(define-vop (,name)
    
    1942
    +	    (:translate %negate)
    
    1943
    +	    (:args (x :scs (,sc) :target r))
    
    1944
    +	    (:arg-types ,sc-type)
    
    1945
    +	    (:results (r :scs (,sc)))
    
    1946
    +	    (:result-types ,sc-type)
    
    1947
    +	    (:policy :fast-safe)
    
    1948
    +	    (:temporary (:scs (,sc)) t0)
    
    1949
    +	    (:generator 1
    
    1950
    +	      (inst pcmpeqd t0 t0)	; all ones
    
    1951
    +	      (inst ,shift t0 ,amount)	; #x8000...0000
    
    1952
    +	      (unless (location= x r)
    
    1953
    +		(inst movaps r x))
    
    1954
    +	      (inst ,xor r t0))))))
    
    1955
    +  (negate-complex single pslld xorps 31)
    
    1956
    +  (negate-complex double psllq xorpd 63))
    
    1957
    +
    
    1958
    +;; Convert various number types to complex double-floats
    
    1959
    +(macrolet
    
    1960
    +    ((convert-complex (trans op to from)
    
    1961
    +       (let ((name (symbolicate to "/" from))
    
    1962
    +	     (from-sc (symbolicate from "-REG"))
    
    1963
    +	     (from-type (symbolicate from "-FLOAT"))
    
    1964
    +	     (to-sc (symbolicate to "-REG"))
    
    1965
    +	     (to-type (symbolicate to "-FLOAT")))
    
    1966
    +	 `(define-vop (,name)
    
    1967
    +	   (:translate ,trans)
    
    1968
    +	   (:args (x :scs (,from-sc) :target r))
    
    1969
    +	   (:arg-types ,from-type)
    
    1970
    +	   (:results (r :scs (,to-sc)))
    
    1971
    +	   (:result-types ,to-type)
    
    1972
    +	   (:policy :fast-safe)
    
    1973
    +	   (:generator 1
    
    1974
    +	     ;; NOTE: We don't have 128-bit aligned objects, so we
    
    1975
    +	     ;; can't use the stack or descriptors here.
    
    1976
    +	     (inst ,op r x))))))
    
    1977
    +  (convert-complex %complex-double-float cvtps2pd complex-double complex-single)
    
    1978
    +  (convert-complex %complex-single-float cvtpd2ps complex-single complex-double))
    
    1979
    +
    
    1980
    +(macrolet
    
    1981
    +    ((convert-complex (trans op base-ea to from movinst)
    
    1982
    +       (let ((name (symbolicate to "/" from))
    
    1983
    +	     (from-sc (symbolicate from "-REG"))
    
    1984
    +	     (from-sc-stack (symbolicate from "-STACK"))
    
    1985
    +	     (from-type (symbolicate from "-FLOAT"))
    
    1986
    +	     (to-sc (symbolicate to "-REG"))
    
    1987
    +	     (to-type (symbolicate to "-FLOAT")))
    
    1988
    +	 `(define-vop (,name)
    
    1989
    +	   (:translate ,trans)
    
    1990
    +	   (:args (x :scs (,from-sc ,from-sc-stack descriptor-reg)
    
    1991
    +		   :target r))
    
    1992
    +	   (:arg-types ,from-type)
    
    1993
    +	   (:results (r :scs (,to-sc)))
    
    1994
    +	   (:result-types ,to-type)
    
    1995
    +	   (:temporary (:sc ,to-sc) temp)
    
    1996
    +	   (:policy :fast-safe)
    
    1997
    +	   (:generator 1
    
    1998
    +	     (sc-case x
    
    1999
    +	       (,from-sc
    
    2000
    +		;; Need to make sure the imaginary part is zero
    
    2001
    +		(cond ((location= x r)
    
    2002
    +		       (inst xorps temp temp)
    
    2003
    +		       (inst ,op temp x)
    
    2004
    +		       (inst ,movinst r temp))
    
    2005
    +		      (t
    
    2006
    +		       (inst xorps r r)
    
    2007
    +		       (inst ,op r x))))
    
    2008
    +	       (,from-sc-stack
    
    2009
    +		(inst xorps r r)
    
    2010
    +		(inst ,op r (,(symbolicate "EA-FOR-" base-ea "-STACK") x)))
    
    2011
    +	       (descriptor-reg
    
    2012
    +		(inst xorps r r)
    
    2013
    +		(inst ,op r (,(symbolicate "EA-FOR-" base-ea "-DESC") x)))))))))
    
    2014
    +  (convert-complex %complex-double-float cvtss2sd sf complex-double single movapd)
    
    2015
    +  (convert-complex %complex-single-float cvtsd2ss df complex-single double movaps))
    
    2016
    +
    
    2017
    +;; Add and subtract for two complex arguments
    
    2018
    +(macrolet
    
    2019
    +    ((generate (movinst opinst commutative)
    
    2020
    +       `(cond
    
    2021
    +	 ((location= x r)
    
    2022
    +	  (inst ,opinst x y))
    
    2023
    +	 ((and ,commutative (location= y r))
    
    2024
    +	  (inst ,opinst y x))
    
    2025
    +	 ((not (location= r y))
    
    2026
    +	  (inst ,movinst r x)
    
    2027
    +	  (inst ,opinst r y))
    
    2028
    +	 (t
    
    2029
    +	  (inst ,movinst tmp x)
    
    2030
    +	  (inst ,opinst tmp y)
    
    2031
    +	  (inst ,movinst r tmp))))
    
    2032
    +     (complex-add/sub (op inst float-type cost &optional commutative)
    
    2033
    +       (let* ((vop-name (symbolicate (symbol-name op) "/COMPLEX-" float-type "-FLOAT"))
    
    2034
    +	      (c-type (symbolicate "COMPLEX-" float-type "-FLOAT"))
    
    2035
    +	      (complex-reg (symbolicate "COMPLEX-" float-type "-REG")))
    
    2036
    +	 ;; Note: It would probably improve things if we could use
    
    2037
    +	 ;; memory operands, but we can't because the instructions
    
    2038
    +	 ;; assumed 128-bit alignment, which we can't guarantee.
    
    2039
    +	 `(define-vop (,vop-name)
    
    2040
    +	   (:args (x :scs (,complex-reg) :target r)
    
    2041
    +	          (y :scs (,complex-reg)))
    
    2042
    +	   (:results (r :scs (,complex-reg)))
    
    2043
    +	   (:arg-types ,c-type ,c-type)
    
    2044
    +	   (:result-types ,c-type)
    
    2045
    +	   (:policy :fast-safe)
    
    2046
    +	   (:note _N"inline complex float arithmetic")
    
    2047
    +	   (:translate ,op)
    
    2048
    +	   (:temporary (:sc ,complex-reg) tmp)
    
    2049
    +	   (:generator ,cost
    
    2050
    +	     (generate movaps ,inst ,commutative))))))
    
    2051
    +  (complex-add/sub + addps single 1 t)
    
    2052
    +  (complex-add/sub + addpd double 1 t)
    
    2053
    +  (complex-add/sub - subps single 1)
    
    2054
    +  (complex-add/sub - subpd double 1))
    
    2055
    +
    
    2056
    +;; Add and subtract a complex and a float
    
    2057
    +(macrolet
    
    2058
    +    ((generate (movinst opinst)
    
    2059
    +       `(cond
    
    2060
    +	 ((location= x r)
    
    2061
    +	  (inst ,opinst x rtmp))
    
    2062
    +	 ((not (location= r rtmp))
    
    2063
    +	  (inst ,movinst r x)
    
    2064
    +	  (inst ,opinst r rtmp))
    
    2065
    +	 (t
    
    2066
    +	  (inst ,movinst tmp x)
    
    2067
    +	  (inst ,opinst tmp rtmp)
    
    2068
    +	  (inst ,movinst r tmp))))
    
    2069
    +     (complex-op-float (size op fop base-ea cost)
    
    2070
    +       (let ((vop-name (symbolicate "COMPLEX-" size "-FLOAT-"
    
    2071
    +				    op
    
    2072
    +				    "-" size "-FLOAT"))
    
    2073
    +	     (complex-reg (symbolicate "COMPLEX-" size "-REG"))
    
    2074
    +	     (real-reg (symbolicate size "-REG"))
    
    2075
    +	     (c-type (symbolicate "COMPLEX-" size "-FLOAT"))
    
    2076
    +	     (r-type (symbolicate size "-FLOAT"))
    
    2077
    +	     (r-stack (symbolicate size "-STACK"))
    
    2078
    +	     (ea-stack (symbolicate "EA-FOR-" base-ea "-STACK"))
    
    2079
    +	     (ea-desc (symbolicate "EA-FOR-" base-ea "-DESC"))
    
    2080
    +	     (loadinst (ecase size
    
    2081
    +			 (single 'movss)
    
    2082
    +			 (double 'movsd)))
    
    2083
    +	     (movinst (ecase size
    
    2084
    +			(single 'movaps)
    
    2085
    +			(double 'movapd))))
    
    2086
    +	 `(define-vop (,vop-name)
    
    2087
    +	    (:args (x :scs (,complex-reg))
    
    2088
    +	           (y :scs (,real-reg ,r-stack descriptor-reg)))
    
    2089
    +	    (:results (r :scs (,complex-reg)))
    
    2090
    +	    (:arg-types ,c-type ,r-type)
    
    2091
    +	    (:result-types ,c-type)
    
    2092
    +	    (:policy :fast-safe)
    
    2093
    +	    (:note _N"inline complex float/float arithmetic")
    
    2094
    +	    (:translate ,op)
    
    2095
    +	    (:temporary (:sc ,complex-reg) tmp)
    
    2096
    +	    (:temporary (:sc ,real-reg) rtmp)
    
    2097
    +	    (:generator ,cost
    
    2098
    +	      ;; Clear out high and low parts of temp, which will
    
    2099
    +	      ;; eventually hold y.
    
    2100
    +	      (inst xorpd rtmp rtmp)
    
    2101
    +	      (sc-case y
    
    2102
    +		(,real-reg
    
    2103
    +		 (inst ,loadinst rtmp y)
    
    2104
    +		 (generate ,movinst ,fop))
    
    2105
    +		(,r-stack
    
    2106
    +		 (let ((ea (,ea-stack y)))
    
    2107
    +		   (inst ,loadinst rtmp ea)
    
    2108
    +		   (generate ,movinst ,fop)))
    
    2109
    +		(descriptor-reg
    
    2110
    +		 (let ((ea (,ea-desc y)))
    
    2111
    +		   (inst ,loadinst rtmp ea)
    
    2112
    +		   (generate ,movinst ,fop)))))))))
    
    2113
    +  (complex-op-float single + addps sf 1)
    
    2114
    +  (complex-op-float single - subps sf 1)
    
    2115
    +  (complex-op-float double + addpd df 1)
    
    2116
    +  (complex-op-float double - subpd df 1))
    
    2117
    +
    
    2118
    +;; Add a float and a complex
    
    2119
    +(macrolet
    
    2120
    +    ((generate (movinst opinst)
    
    2121
    +       `(cond
    
    2122
    +	 ((location= x r)
    
    2123
    +	  (inst ,opinst x rtmp))
    
    2124
    +	 ((not (location= r y))
    
    2125
    +	  (inst ,movinst r x)
    
    2126
    +	  (inst ,opinst r rtmp))
    
    2127
    +	 (t
    
    2128
    +	  (inst ,movinst tmp x)
    
    2129
    +	  (inst ,opinst tmp rtmp)
    
    2130
    +	  (inst ,movinst r tmp))))
    
    2131
    +     (complex-op-float (size op fop base-ea cost)
    
    2132
    +       (let ((vop-name (symbolicate size "-FLOAT-"
    
    2133
    +				    op
    
    2134
    +				    "-" "COMPLEX-" size "-FLOAT"))
    
    2135
    +	     (complex-reg (symbolicate "COMPLEX-" size "-REG"))
    
    2136
    +	     (real-reg (symbolicate size "-REG"))
    
    2137
    +	     (c-type (symbolicate "COMPLEX-" size "-FLOAT"))
    
    2138
    +	     (r-type (symbolicate size "-FLOAT"))
    
    2139
    +	     (r-stack (symbolicate size "-STACK"))
    
    2140
    +	     (ea-stack (symbolicate "EA-FOR-" base-ea "-STACK"))
    
    2141
    +	     (ea-desc (symbolicate "EA-FOR-" base-ea "-DESC"))
    
    2142
    +	     (loadinst (ecase size
    
    2143
    +			 (single 'movss)
    
    2144
    +			 (double 'movsd)))
    
    2145
    +	     (movinst (ecase size
    
    2146
    +			(single 'movaps)
    
    2147
    +			(double 'movapd))))
    
    2148
    +	 `(define-vop (,vop-name)
    
    2149
    +	    (:args (y :scs (,real-reg ,r-stack descriptor-reg))
    
    2150
    +	           (x :scs (,complex-reg)))
    
    2151
    +	    (:results (r :scs (,complex-reg)))
    
    2152
    +	    (:arg-types ,r-type ,c-type)
    
    2153
    +	    (:result-types ,c-type)
    
    2154
    +	    (:policy :fast-safe)
    
    2155
    +	    (:note _N"inline complex float/float arithmetic")
    
    2156
    +	    (:translate ,op)
    
    2157
    +	    (:temporary (:sc ,complex-reg) tmp)
    
    2158
    +	    (:temporary (:sc ,real-reg) rtmp)
    
    2159
    +	    (:generator ,cost
    
    2160
    +	      (inst xorpd rtmp rtmp)
    
    2161
    +	      (sc-case y
    
    2162
    +		(,real-reg
    
    2163
    +		 (inst ,loadinst rtmp y)
    
    2164
    +		 (generate ,movinst ,fop))
    
    2165
    +		(,r-stack
    
    2166
    +		 (let ((ea (,ea-stack y)))
    
    2167
    +		   (inst ,loadinst rtmp ea)
    
    2168
    +		   (generate ,movinst ,fop)))
    
    2169
    +		(descriptor-reg
    
    2170
    +		 (let ((ea (,ea-desc y)))
    
    2171
    +		   (inst ,loadinst rtmp ea)
    
    2172
    +		   (generate ,movinst ,fop)))))))))
    
    2173
    +  (complex-op-float single + addps sf 1)
    
    2174
    +  (complex-op-float double + addpd df 1))
    
    2175
    +
    
    2176
    +;; Multiply a complex by a float or a float by a complex.
    
    2177
    +(macrolet
    
    2178
    +    ((complex-*-float (float-type fmul copy cost)
    
    2179
    +       (let* ((vop-name (symbolicate "COMPLEX-"
    
    2180
    +				     float-type
    
    2181
    +				     "-FLOAT-*-"
    
    2182
    +				     float-type
    
    2183
    +				     "-FLOAT"))
    
    2184
    +	      (vop-name-r (symbolicate float-type
    
    2185
    +				       "-FLOAT-*-COMPLEX-"
    
    2186
    +				       float-type
    
    2187
    +				       "-FLOAT"))
    
    2188
    +	      (complex-sc-type (symbolicate "COMPLEX-" float-type "-REG"))
    
    2189
    +	      (real-sc-type (symbolicate float-type "-REG"))
    
    2190
    +	      (c-type (symbolicate "COMPLEX-" float-type "-FLOAT"))
    
    2191
    +	      (r-type (symbolicate float-type "-FLOAT")))
    
    2192
    +	 `(progn
    
    2193
    +	   ;; Complex * float
    
    2194
    +	   (define-vop (,vop-name)
    
    2195
    +	     (:args (x :scs (,complex-sc-type))
    
    2196
    +	            (y :scs (,real-sc-type)))
    
    2197
    +	     (:results (r :scs (,complex-sc-type)))
    
    2198
    +	     (:arg-types ,c-type ,r-type)
    
    2199
    +	     (:result-types ,c-type)
    
    2200
    +	     (:policy :fast-safe)
    
    2201
    +	     (:note _N"inline complex float arithmetic")
    
    2202
    +	     (:translate *)
    
    2203
    +	     (:temporary (:scs (,complex-sc-type)) t0)
    
    2204
    +	     (:generator ,cost
    
    2205
    +	       (inst movaps t0 y)	; t0 = y
    
    2206
    +	       (inst ,copy t0 t0)	; t0 = y|y
    
    2207
    +	       (unless (location= x r)
    
    2208
    +		 (inst movaps r x))	; r = xi|xr
    
    2209
    +	       (inst ,fmul r t0)))
    
    2210
    +	   (define-vop (,vop-name-r)
    
    2211
    +	     (:args (x :scs (,real-sc-type))
    
    2212
    +	            (y :scs (,complex-sc-type)))
    
    2213
    +	     (:results (r :scs (,complex-sc-type)))
    
    2214
    +	     (:arg-types ,r-type ,c-type)
    
    2215
    +	     (:result-types ,c-type)
    
    2216
    +	     (:policy :fast-safe)
    
    2217
    +	     (:note _N"inline complex float arithmetic")
    
    2218
    +	     (:translate *)
    
    2219
    +	     (:temporary (:scs (,complex-sc-type)) t0)
    
    2220
    +	     (:generator ,cost
    
    2221
    +	       (inst movaps t0 x)	; t0 = 0|x or 0|0|0|x
    
    2222
    +	       (inst ,copy t0 t0)	; t0 = x|x or 0|0|x|x
    
    2223
    +	       (unless (location= y r)
    
    2224
    +		 (inst movaps r y))	; r = yi|yr or 0|0|yi|yr
    
    2225
    +	       (inst ,fmul r t0)))))))
    
    2226
    +  (complex-*-float single mulps unpcklps 4)
    
    2227
    +  (complex-*-float double mulpd unpcklpd 4))
    
    2228
    +
    
    2229
    +;; Divide a complex by a real
    
    2230
    +(define-vop (complex-double-float-/-double-float)
    
    2231
    +  (:args (x :scs (complex-double-reg)) (y :scs (double-reg)))
    
    2232
    +  (:results (r :scs (complex-double-reg)))
    
    2233
    +  (:arg-types complex-double-float double-float)
    
    2234
    +  (:result-types complex-double-float)
    
    2235
    +  (:policy :fast-safe)
    
    2236
    +  (:note _N"inline complex float arithmetic")
    
    2237
    +  (:translate /)
    
    2238
    +  (:temporary (:sc complex-double-reg) t0)
    
    2239
    +  (:generator 4
    
    2240
    +    (inst movaps t0 y)			; t0 = u|y
    
    2241
    +    (inst unpcklpd t0 t0)		; t0 = y|y
    
    2242
    +    (unless (location= x r)
    
    2243
    +      (inst movaps r x))		; r = xi|xr
    
    2244
    +    (inst divpd r t0)))
    
    2245
    +
    
    2246
    +(define-vop (complex-single-float-/-single-float)
    
    2247
    +  (:args (x :scs (complex-single-reg)) (y :scs (single-reg)))
    
    2248
    +  (:results (r :scs (complex-single-reg)))
    
    2249
    +  (:arg-types complex-single-float single-float)
    
    2250
    +  (:result-types complex-single-float)
    
    2251
    +  (:policy :fast-safe)
    
    2252
    +  (:note _N"inline complex float arithmetic")
    
    2253
    +  (:translate /)
    
    2254
    +  (:temporary (:sc complex-single-reg) t0 t1)
    
    2255
    +  (:generator 5
    
    2256
    +    ;; The upper parts of x may contain junk and dividing that by y
    
    2257
    +    ;; may cause spurious signals.  Thus, copy the complex number to
    
    2258
    +    ;; the high part.
    
    2259
    +    (inst movaps t0 y)			; t0 = u|u|u|y
    
    2260
    +    (inst shufps t0 t0 0)		; t0 = y|y|y|y
    
    2261
    +    (inst movaps t1 x)			; t1 = u|u|xi|xr
    
    2262
    +    (inst movlhps t1 t1)		; t1 = xi|xr|xi|xr
    
    2263
    +    (inst divps t1 t0)
    
    2264
    +    (inst xorps t0 t0)			; t0 = 0|0|0|0
    
    2265
    +    (inst movaps r t1)
    
    2266
    +    (inst movlhps r t0)))
    
    2267
    +
    
    2268
    +(define-vop (sse3-*/complex-double-float)
    
    2269
    +  (:translate *)
    
    2270
    +  (:args (x :scs (complex-double-reg))
    
    2271
    +	 (y :scs (complex-double-reg)))
    
    2272
    +  (:arg-types complex-double-float complex-double-float)
    
    2273
    +  (:results (r :scs (complex-double-reg)))
    
    2274
    +  (:result-types complex-double-float)
    
    2275
    +  (:policy :fast-safe)
    
    2276
    +  (:temporary (:scs (complex-double-reg)) t1 t2)
    
    2277
    +  (:guard (backend-featurep :sse3))
    
    2278
    +  (:generator 8
    
    2279
    +    ;; Basic algorithm from the paper "The Microarchitecture of the
    
    2280
    +    ;; Intel Pentium 4 Processor on 90nm Technololgy"
    
    2281
    +    ;;
    
    2282
    +    ;; This requires SSSE3 instructions (addsubpd, movddup).
    
    2283
    +    ;;
    
    2284
    +    ;; x = a + b*i.  In sse2 reg we have: b|a
    
    2285
    +    ;; y = c + d*i.  In sse2 reg we have: d|c
    
    2286
    +    (inst movddup t1 x)			; t1 = a|a
    
    2287
    +    (inst mulpd t1 y)			; t1 = a*d|a*c
    
    2288
    +    (inst movapd t2 x)			; t2 = b|a
    
    2289
    +    (inst unpckhpd t2 t2)		; t2 = b|b
    
    2290
    +    (inst mulpd t2 y)			; t2 = b*d|b*c
    
    2291
    +    (inst shufpd t2 t2 1)		; t2 = b*c|b*d
    
    2292
    +    (inst addsubpd t1 t2)		; t2 = a*d+b*c|a*c-b*d
    
    2293
    +    (inst movapd r t1)))
    
    2294
    +
    
    2295
    +(define-vop (*/complex-double-float)
    
    2296
    +  (:translate *)
    
    2297
    +  (:args (x :scs (complex-double-reg))
    
    2298
    +	 (y :scs (complex-double-reg)))
    
    2299
    +  (:arg-types complex-double-float complex-double-float)
    
    2300
    +  (:results (r :scs (complex-double-reg)))
    
    2301
    +  (:result-types complex-double-float)
    
    2302
    +  (:policy :fast-safe)
    
    2303
    +  (:temporary (:scs (complex-double-reg)) t0 t1 t2)
    
    2304
    +  (:temporary (:scs (unsigned-reg)) tmp)
    
    2305
    +  (:generator 13
    
    2306
    +    ;; Basic algorithm from the paper "The Microarchitecture of the
    
    2307
    +    ;; Intel Pentium 4 Processor on 90nm Technololgy"
    
    2308
    +
    
    2309
    +    ;; x = a+b*i = b|a
    
    2310
    +    ;; y = c+d*i = d|c
    
    2311
    +    ;; r = a*c-b*d + i*(a*d+b*c)
    
    2312
    +    (inst movapd t1 y)			; t1 = d|c
    
    2313
    +    (inst movapd t2 y)			; t2 = d|c
    
    2314
    +    (inst unpcklpd t1 t1)		; t1 = c|c
    
    2315
    +    (inst unpckhpd t2 t2)		; t2 = d|d
    
    2316
    +    (inst mulpd t1 x)			; t1 = b*c|a*c
    
    2317
    +    (inst mulpd t2 x)			; t2 = b*d|a*d
    
    2318
    +    (inst shufpd t2 t2 1)		; t2 = a*d|b*d
    
    2319
    +    (inst mov tmp #x80000000)
    
    2320
    +    (inst movd t0 tmp)			; t0 = 0|0|0|#x80000000
    
    2321
    +    (inst psllq t0 32)			; t0 = 0|#x80000000,00000000
    
    2322
    +    (inst xorpd t2 t0)			; t2 = a*d|-b*d
    
    2323
    +    (inst addpd t2 t1)			; t2 = a*d+b*c | a*c-b*d
    
    2324
    +    (inst movapd r t2)))
    
    2325
    +
    
    2326
    +
    
    2327
    +(define-vop (*/complex-single-float)
    
    2328
    +  (:translate *)
    
    2329
    +  (:args (x :scs (complex-single-reg))
    
    2330
    +	 (y :scs (complex-single-reg)))
    
    2331
    +  (:arg-types complex-single-float complex-single-float)
    
    2332
    +  (:results (r :scs (complex-single-reg)))
    
    2333
    +  (:result-types complex-single-float)
    
    2334
    +  (:policy :fast-safe)
    
    2335
    +  (:temporary (:scs (complex-single-reg)) t0 t1 t2)
    
    2336
    +  (:temporary (:scs (unsigned-reg)) tmp)
    
    2337
    +  (:generator 14
    
    2338
    +    ;; Basic algorithm from the paper "The Microarchitecture of the
    
    2339
    +    ;; Intel Pentium 4 Processor on 90nm Technololgy"
    
    2340
    +
    
    2341
    +    ;; x = a+b*i = b|a
    
    2342
    +    ;; y = c+d*i = d|c
    
    2343
    +    ;; r = a*c-b*d + i*(a*d+b*c)
    
    2344
    +    (inst movaps t1 y)			; t1 = u|u|d|c
    
    2345
    +    (inst movaps t2 y)			; t2 = u|u|d|c
    
    2346
    +    (inst shufps t1 t1 #b00000000)	; t1 = c|c|c|c
    
    2347
    +    (inst shufps t2 t2 #b01010101)	; t2 = d|d|d|d
    
    2348
    +    (inst mulps t1 x)			; t1 = b*c|a*c
    
    2349
    +    (inst mulps t2 x)			; t2 = b*d|a*d
    
    2350
    +    (inst shufps t2 t2 1)		; t2 = a*d|b*d
    
    2351
    +    (inst mov tmp #x80000000)
    
    2352
    +    (inst movd t0 tmp)			; t0 = 0|0|0|#x80000000
    
    2353
    +    (inst xorps t2 t0)			; t2 = a*d|-b*d
    
    2354
    +    (inst addps t2 t1)			; t2 = a*d+b*c | a*c-b*d
    
    2355
    +    (inst xorps t1 t1)			; t1 = 0|0|0|0
    
    2356
    +    (inst movaps r t2)
    
    2357
    +    (inst movlhps r t1)))
    
    2358
    +
    
    2359
    +;; Conjugate
    
    2360
    +(define-vop (conjugate/complex-double-float)
    
    2361
    +  (:translate conjugate)
    
    2362
    +  (:args (z :scs (complex-double-reg)))
    
    2363
    +  (:arg-types complex-double-float)
    
    2364
    +  (:results (r :scs (complex-double-reg)))
    
    2365
    +  (:result-types complex-double-float)
    
    2366
    +  (:policy :fast-safe)
    
    2367
    +  (:temporary (:scs (complex-double-reg)) ztmp)
    
    2368
    +  (:temporary (:scs (unsigned-reg)) tmp)
    
    2369
    +  (:generator 2
    
    2370
    +    (inst mov tmp #x80000000)
    
    2371
    +    (inst movd ztmp tmp)
    
    2372
    +    (inst psllq ztmp 32)		; ztmp = 0|#x80000000,00000000
    
    2373
    +    (inst shufpd ztmp ztmp 1)		; ztmp = #x80000000,00000000|0
    
    2374
    +    (inst xorpd ztmp z)			; ztmp = -xi|xi
    
    2375
    +    (inst movapd r ztmp)))
    
    2376
    +
    
    2377
    +(define-vop (conjugate/complex-single-float)
    
    2378
    +  (:translate conjugate)
    
    2379
    +  (:args (z :scs (complex-single-reg)))
    
    2380
    +  (:arg-types complex-single-float)
    
    2381
    +  (:results (r :scs (complex-single-reg)))
    
    2382
    +  (:result-types complex-single-float)
    
    2383
    +  (:policy :fast-safe)
    
    2384
    +  (:temporary (:scs (complex-single-reg)) ztmp)
    
    2385
    +  (:temporary (:scs (unsigned-reg)) tmp)
    
    2386
    +  (:generator 2
    
    2387
    +    (inst mov tmp #x80000000)
    
    2388
    +    (inst movd ztmp tmp)
    
    2389
    +    (inst psllq ztmp 32)		; ztmp = #x80000000|0
    
    2390
    +    (inst xorps ztmp z)			; ztmp = -xi|xr
    
    2391
    +    (inst movaps r ztmp)))

  • src/compiler/amd64/macros.lisp
    ... ... @@ -130,10 +130,10 @@
    130 130
     	      (n-offset offset))
    
    131 131
         (ecase (backend-byte-order *target-backend*)
    
    132 132
           (:little-endian
    
    133
    -       `(inst mov ,n-target
    
    133
    +       `(inst movzx ,n-target
    
    134 134
     	      (make-ea :byte :base ,n-source :disp ,n-offset)))
    
    135 135
           (:big-endian
    
    136
    -       `(inst mov ,n-target
    
    136
    +       `(inst movzx ,n-target
    
    137 137
     	      (make-ea :byte :base ,n-source :disp (+ ,n-offset 3)))))))
    
    138 138
     
    
    139 139
     (defmacro load-foreign-data-symbol (reg name )
    

  • src/compiler/amd64/vm.lisp
    ... ... @@ -268,8 +268,7 @@
    268 268
     
    
    269 269
       ;; Non-Descriptor characters
    
    270 270
       (base-char-reg registers
    
    271
    -		 :locations #.byte-regs
    
    272
    -		 :reserve-locations (#.ah-offset #.al-offset)
    
    271
    +		 :locations #.dword-regs
    
    273 272
     		 :constant-scs (immediate)
    
    274 273
     		 :save-p t
    
    275 274
     		 :alternate-scs (base-char-stack))
    
    ... ... @@ -385,12 +384,13 @@
    385 384
     
    
    386 385
     (eval-when (compile load eval)
    
    387 386
     
    
    388
    -(defconstant byte-sc-names '(base-char-reg byte-reg base-char-stack))
    
    387
    +(defconstant byte-sc-names '(byte-reg))
    
    389 388
     (defconstant word-sc-names '(word-reg))
    
    390 389
     (defconstant dword-sc-names '(dword-reg))
    
    391 390
     (defconstant qword-sc-names
    
    392 391
       '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack
    
    393
    -    signed-stack unsigned-stack sap-stack single-stack constant))
    
    392
    +    signed-stack unsigned-stack sap-stack single-stack constant
    
    393
    +    base-char-reg base-char-stack))
    
    394 394
     
    
    395 395
     ;;;
    
    396 396
     ;;; added by jrd.  I guess the right thing to do is to treat floats
    

  • src/compiler/generic/objdef.lisp
    ... ... @@ -123,13 +123,13 @@
    123 123
       single-float
    
    124 124
       double-float
    
    125 125
       #+long-float long-float
    
    126
    -  #+#.(c:target-featurep :double-double)
    
    126
    +  #+double-double
    
    127 127
       double-double-float
    
    128 128
       complex
    
    129 129
       complex-single-float
    
    130 130
       complex-double-float
    
    131 131
       #+long-float complex-long-float
    
    132
    -  #+#.(c:target-featurep :double-double)
    
    132
    +  #+double-double
    
    133 133
       complex-double-double-float
    
    134 134
       
    
    135 135
       simple-array
    
    ... ... @@ -148,12 +148,12 @@
    148 148
       simple-array-single-float
    
    149 149
       simple-array-double-float
    
    150 150
       #+long-float simple-array-long-float
    
    151
    -  #+#.(c:target-featurep :double-double)
    
    151
    +  #+double-double
    
    152 152
       simple-array-double-double-float
    
    153 153
       simple-array-complex-single-float
    
    154 154
       simple-array-complex-double-float
    
    155 155
       #+long-float simple-array-complex-long-float
    
    156
    -  #+#.(c:target-featurep :double-double)
    
    156
    +  #+double-double
    
    157 157
       simple-array-complex-double-double-float
    
    158 158
       complex-string
    
    159 159
       complex-bit-vector
    

  • src/tools/cross-scripts/cross-x86-amd64.lisp
    ... ... @@ -206,6 +206,7 @@
    206 206
     	))
    
    207 207
     
    
    208 208
     (in-package :vm)
    
    209
    +(defvar *num-fixups* 0)
    
    209 210
     (defun fixup-code-object (code offset fixup kind)
    
    210 211
       (declare (type index offset))
    
    211 212
       (flet ((add-fixup (code offset)