Update of /project/sb-simd/cvsroot/sb-simd In directory common-lisp.net:/tmp/cvs-serv31523
Modified Files: sse-vop.lisp generate-sse-instructions.lisp Log Message: ..
Date: Mon Aug 8 15:33:29 2005 Author: rlaakso
Index: sb-simd/sse-vop.lisp diff -u sb-simd/sse-vop.lisp:1.1.1.1 sb-simd/sse-vop.lisp:1.2 --- sb-simd/sse-vop.lisp:1.1.1.1 Fri Aug 5 15:13:29 2005 +++ sb-simd/sse-vop.lisp Mon Aug 8 15:33:29 2005 @@ -4,204 +4,113 @@ `(make-ea :dword :base ,vect :index ,idx :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)))
+;; TWO-ARG SSE VOPs +(loop for (op-name type mov-inst op-inst) in + '( + (add single-float movups addps) + (addsub single-float movups addsubps) + (andnot single-float movups andnps) + (and single-float movups andps) + (div single-float movups divps) + (hadd single-float movups haddps) + (hsub single-float movups hsubps) + (max single-float movups maxps) + (min single-float movups minps) + (mul single-float movups mulps) + (or single-float movups orps) + (sub single-float movups subps) + (xor single-float movups xorps) + + (add double-float movupd addpd) + (addsub double-float movupd addsubpd) + (andnot double-float movupd andnpd) + (and double-float movupd andpd) + (div double-float movupd divpd) + (hadd double-float movupd haddpd) + (hsub double-float movupd hsubpd) + (max double-float movupd maxpd) + (min double-float movupd minpd) + (mul double-float movupd mulpd) + (or double-float movupd orpd) + (sub double-float movupd subpd) + (xor double-float movupd xorpd) + ) + do + + `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (:policy :fast-safe)
-(define-vop (%sse-add/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst addps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-sub/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst subps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-mul/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + ;;(:guard (member :sse2 *backend-subfeatures*))
- (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst mulps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-div/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) + (:args + (result :scs (descriptor-reg)) (vect1 :scs (descriptor-reg)) (vect2 :scs (descriptor-reg)) (index :scs (unsigned-reg)))
- (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst divps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-sqrt/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum)
- (:arg-types simple-array-single-float simple-array-single-float fixnum) + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2)
- (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:generator 10
- (:generator 10 + ;; scale index by 4 (size-of single-float) + (inst shl index 2)
- ;; scale index by 4 (size-of single-float) - (inst shl index 2) + ;; load + (inst ,mov-inst sse-temp1 (vect-ea vect1 index)) + (inst ,mov-inst sse-temp2 (vect-ea vect2 index))
- ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) + ;; operate + (inst ,op-inst sse-temp1 sse-temp2)
- ;; operate - (inst sqrtps sse-temp2 sse-temp1) + ;; store + (inst ,mov-inst (vect-ea result index) sse-temp1) + )))
- ;; store - (inst movups (vect-ea result index) sse-temp2) - )) +;; SINGLE-ARG SSE VOPs +(loop for (op-name type mov-inst op-inst) in + '( + (recip single-float movups rcpps) + (rsqrt single-float movups rsqrtps) + (sqrt single-float movups sqrtps) + (sqrt double-float movupd sqrtpd) + ) + do + + `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (:policy :fast-safe)
+ ;;(:guard (member :sse2 *backend-subfeatures*))
-(define-vop (%sse-recip/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) + (:args + (result :scs (descriptor-reg)) (vect1 :scs (descriptor-reg)) (index :scs (unsigned-reg)))
- (:arg-types simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - - ;; operate - (inst rcpps sse-temp2 sse-temp1) - - ;; store - (inst movups (vect-ea result index) sse-temp2) - )) - - -(define-vop (%sse-recip-sqrt/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum)
- ;; scale index by 4 (size-of single-float) - (inst shl index 2) + (:temporary (:sc sse-reg) sse-temp1)
- ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) + (:generator 10
- ;; operate - (inst rsqrtps sse-temp2 sse-temp1) + ;; scale index by 4 (size-of single-float) + (inst shl index 2)
- ;; store - (inst movups (vect-ea result index) sse-temp2) - )) + ;; load + (inst ,mov-inst sse-temp1 (vect-ea vect1 index))
+ ;; operate + (inst ,op-inst sse-temp1)
+ ;; store + (inst ,mov-inst (vect-ea result index) sse-temp1) + )))
Index: sb-simd/generate-sse-instructions.lisp diff -u sb-simd/generate-sse-instructions.lisp:1.3 sb-simd/generate-sse-instructions.lisp:1.4 --- sb-simd/generate-sse-instructions.lisp:1.3 Mon Aug 8 12:59:52 2005 +++ sb-simd/generate-sse-instructions.lisp Mon Aug 8 15:33:29 2005 @@ -7,49 +7,16 @@
TODO:
-CMPPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 -CMPPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 -CMPSD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 -CMPSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 - FXRSTOR. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 121 FXSAVE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 124
-HADDPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 126 -HADDPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 129 -HSUBPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 132 -HSUBPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 135 - -LDDQU. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 138 LDMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 140
-MASKMOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 142 - -MOVD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 173 -MOVDDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 176 MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 178
-MOVHLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 184 - -MOVLHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 190 - -MOVMSKPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 196 -MOVMSKPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 198 -MOVNTDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 200 -MOVNTPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 202 -MOVNTPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 204 - MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208
-MOVSHDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 213 -MOVSLDUP . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 215 - -PEXTRW. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 284 -PINSRW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 286 - -PSHUFD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 314 -PSHUFHW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 317 -PSHUFLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 320 +(ib-forms:) PSLLD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 323 PSLLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 326 PSLLQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 328 @@ -61,8 +28,6 @@ PSRLQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 344 PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 347
-SHUFPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 392 -SHUFPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 395 STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410
@@ -88,6 +53,8 @@ (andnps #x0F #x55) (andps #x0F #x54) (divps #x0F #x5E) + (haddps #xF2 #x0F #x7C) + (hsubps #xF2 #x0F #x7D) (maxps #x0F #x5F) (minps #x0F #x5D) (mulps #x0F #x59) @@ -106,10 +73,12 @@ (andnpd #x66 #x0F #x55) (andpd #x66 #x0F #x54) (divpd #x66 #x0F #x5E) + (haddpd #x66 #x0F #x7C) + (hsubpd #x66 #x0F #x7D) (maxpd #x66 #x0F #x5F) (minpd #x66 #x0F #x5D) (mulpd #x66 #x0F #x59) - (orps #x66 #x0F #x56) + (orpd #x66 #x0F #x56) (sqrtpd #x66 #x0F #x51) (subpd #x66 #x0F #x5C) (unpckhpd #x66 #x0F #x15) @@ -238,7 +207,20 @@ (cvttps2pi #x0F #x2C) (cvttsd2si #xF2 #x0F #x2C) (cvttss2si #xF3 #x0F #x2C) - + + ;; misc + (lddqu #xF2 #x0F #xF0) + (maskmovdqu #x66 #x0F #xF7) + (movddup #xF2 #x0F #x12) + (movhlps #x0F #x12) + (movlhps #x0F #x16) + (movmskpd #x66 #x0F #x50) + (movmskps #x0F #x50) + (movntdq #x66 #x0F #XE7) + (movntpd #x66 #x0F #x2B) + (movntps #x0F #x2B) + (movshdup #xF3 #x0F #x16) + (movsldup #xF3 #x0F #x12) ) do (format stream "~S~%~%" @@ -247,11 +229,63 @@ ,@(emit-ops ops) (emit-ea segment src (reg-tn-encoding dst))))))
+ + ;; INSTRUCTIONS WITH /r IB8 + (loop for (inst . ops) in + '( + (pextrw #X66 #x0F #xC5) + (pinsrw #x66 #x0F #xC4) + + (pshufd #x66 #x0F #x70) + (pshufhw #xF3 #x0F #x70) + (pshuflw #xF2 #x0F #x70) + + (shufpd #x66 #x0F #xC6) + (shufps #x0F #xC6) + + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name inst)) (segment dst src byte) + (:emitter + ,@(emit-ops ops) + (emit-ea segment src (reg-tn-encoding dst)) + (emit-sized-immediate segment :byte byte) + )))) + + ;; COMPARE + (loop for (inst . ops) in + '( + (cmppd #x66 #x0F #xC2) + (cmpps #x0F #xC2) + (cmpsd #xF2 #x0F #xC2) + (cmpss #xF3 #x0F #xC2) + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name inst)) (segment dst src cond) + (:emitter + ,@(emit-ops ops) + (emit-ea segment src (reg-tn-encoding dst)) + (emit-sized-immediate segment :byte (cdr (assoc cond + '((:eq . #b000) (:e . #b000) (:z . #b000) + (:l . #b001) (:nge . #b001) + (:le . #b010) (:ng . #b010) + (:unord . #b011) + (:ne . #b100) (:nz . #b100) + (:nl . #b101) (:ge . #b101) + (:nle . #b110) (:g . #b110) + (:ord . #b111) + )))) + )))) + ;; MOVES (loop for (inst ops-m2r ops-r2m) in '( (movapd (#x66 #x0F #x28) (#x66 #x0F #x29)) (movaps (#x0F #x28) (#x0F #x29)) + + (movd (#x66 #x0F #x6E) (#x66 #x0F #x7E))
(movdqa (#x66 #x0F #x6F) (#x66 #x0F #x7F)) (movdqu (#xF3 #x0F #x6F) (#xF3 #x0F #x7F))