[sb-simd-cvs] CVS update: sb-simd/sse-vop.lisp sb-simd/generate-sse-instructions.lisp
Risto Laakso
rlaakso at common-lisp.net
Mon Aug 8 13:33:31 UTC 2005
Update of /project/sb-simd/cvsroot/sb-simd
In directory common-lisp.net:/tmp/cvs-serv31523
Modified Files:
sse-vop.lisp generate-sse-instructions.lisp
Log Message:
..
Date: Mon Aug 8 15:33:29 2005
Author: rlaakso
Index: sb-simd/sse-vop.lisp
diff -u sb-simd/sse-vop.lisp:1.1.1.1 sb-simd/sse-vop.lisp:1.2
--- sb-simd/sse-vop.lisp:1.1.1.1 Fri Aug 5 15:13:29 2005
+++ sb-simd/sse-vop.lisp Mon Aug 8 15:33:29 2005
@@ -4,204 +4,113 @@
`(make-ea :dword :base ,vect :index ,idx
:disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)))
+;; TWO-ARG SSE VOPs
+(loop for (op-name type mov-inst op-inst) in
+ '(
+ (add single-float movups addps)
+ (addsub single-float movups addsubps)
+ (andnot single-float movups andnps)
+ (and single-float movups andps)
+ (div single-float movups divps)
+ (hadd single-float movups haddps)
+ (hsub single-float movups hsubps)
+ (max single-float movups maxps)
+ (min single-float movups minps)
+ (mul single-float movups mulps)
+ (or single-float movups orps)
+ (sub single-float movups subps)
+ (xor single-float movups xorps)
+
+ (add double-float movupd addpd)
+ (addsub double-float movupd addsubpd)
+ (andnot double-float movupd andnpd)
+ (and double-float movupd andpd)
+ (div double-float movupd divpd)
+ (hadd double-float movupd haddpd)
+ (hsub double-float movupd hsubpd)
+ (max double-float movupd maxpd)
+ (min double-float movupd minpd)
+ (mul double-float movupd mulpd)
+ (or double-float movupd orpd)
+ (sub double-float movupd subpd)
+ (xor double-float movupd xorpd)
+ )
+ do
+
+ `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type)))
+ (:policy :fast-safe)
-(define-vop (%sse-add/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
- (vect1 :scs (descriptor-reg))
- (vect2 :scs (descriptor-reg))
- (index :scs (unsigned-reg)))
-
- (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum)
-
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
-
- (:generator 10
-
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
-
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
- (inst movups sse-temp2 (vect-ea vect2 index))
-
- ;; operate
- (inst addps sse-temp1 sse-temp2)
-
- ;; store
- (inst movups (vect-ea result index) sse-temp1)
- ))
-
-(define-vop (%sse-sub/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
- (vect1 :scs (descriptor-reg))
- (vect2 :scs (descriptor-reg))
- (index :scs (unsigned-reg)))
-
- (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum)
-
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
-
- (:generator 10
-
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
-
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
- (inst movups sse-temp2 (vect-ea vect2 index))
-
- ;; operate
- (inst subps sse-temp1 sse-temp2)
-
- ;; store
- (inst movups (vect-ea result index) sse-temp1)
- ))
-
-(define-vop (%sse-mul/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
- (vect1 :scs (descriptor-reg))
- (vect2 :scs (descriptor-reg))
- (index :scs (unsigned-reg)))
-
- (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum)
-
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
+ ;;(:guard (member :sse2 *backend-subfeatures*))
- (:generator 10
-
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
-
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
- (inst movups sse-temp2 (vect-ea vect2 index))
-
- ;; operate
- (inst mulps sse-temp1 sse-temp2)
-
- ;; store
- (inst movups (vect-ea result index) sse-temp1)
- ))
-
-(define-vop (%sse-div/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
+ (:args
+ (result :scs (descriptor-reg))
(vect1 :scs (descriptor-reg))
(vect2 :scs (descriptor-reg))
(index :scs (unsigned-reg)))
- (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum)
-
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
-
- (:generator 10
-
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
-
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
- (inst movups sse-temp2 (vect-ea vect2 index))
-
- ;; operate
- (inst divps sse-temp1 sse-temp2)
-
- ;; store
- (inst movups (vect-ea result index) sse-temp1)
- ))
-
-(define-vop (%sse-sqrt/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
- (vect1 :scs (descriptor-reg))
- (index :scs (unsigned-reg)))
+ (:arg-types
+ ,(intern (format nil "SIMPLE-ARRAY-~A" type))
+ ,(intern (format nil "SIMPLE-ARRAY-~A" type))
+ ,(intern (format nil "SIMPLE-ARRAY-~A" type))
+ fixnum)
- (:arg-types simple-array-single-float simple-array-single-float fixnum)
+ (:temporary (:sc sse-reg) sse-temp1)
+ (:temporary (:sc sse-reg) sse-temp2)
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
+ (:generator 10
- (:generator 10
+ ;; scale index by 4 (size-of single-float)
+ (inst shl index 2)
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
+ ;; load
+ (inst ,mov-inst sse-temp1 (vect-ea vect1 index))
+ (inst ,mov-inst sse-temp2 (vect-ea vect2 index))
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
+ ;; operate
+ (inst ,op-inst sse-temp1 sse-temp2)
- ;; operate
- (inst sqrtps sse-temp2 sse-temp1)
+ ;; store
+ (inst ,mov-inst (vect-ea result index) sse-temp1)
+ )))
- ;; store
- (inst movups (vect-ea result index) sse-temp2)
- ))
+;; SINGLE-ARG SSE VOPs
+(loop for (op-name type mov-inst op-inst) in
+ '(
+ (recip single-float movups rcpps)
+ (rsqrt single-float movups rsqrtps)
+ (sqrt single-float movups sqrtps)
+ (sqrt double-float movupd sqrtpd)
+ )
+ do
+
+ `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type)))
+ (:policy :fast-safe)
+ ;;(:guard (member :sse2 *backend-subfeatures*))
-(define-vop (%sse-recip/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
+ (:args
+ (result :scs (descriptor-reg))
(vect1 :scs (descriptor-reg))
(index :scs (unsigned-reg)))
- (:arg-types simple-array-single-float simple-array-single-float fixnum)
-
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
-
- (:generator 10
-
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
-
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
-
- ;; operate
- (inst rcpps sse-temp2 sse-temp1)
-
- ;; store
- (inst movups (vect-ea result index) sse-temp2)
- ))
-
-
-(define-vop (%sse-recip-sqrt/simple-array-single-float-1)
- (:policy :fast-safe)
-
- (:args (result :scs (descriptor-reg))
- (vect1 :scs (descriptor-reg))
- (index :scs (unsigned-reg)))
-
- (:arg-types simple-array-single-float simple-array-single-float fixnum)
-
- (:temporary (:sc sse-reg) sse-temp1)
- (:temporary (:sc sse-reg) sse-temp2)
-
- (:generator 10
+ (:arg-types
+ ,(intern (format nil "SIMPLE-ARRAY-~A" type))
+ ,(intern (format nil "SIMPLE-ARRAY-~A" type))
+ fixnum)
- ;; scale index by 4 (size-of single-float)
- (inst shl index 2)
+ (:temporary (:sc sse-reg) sse-temp1)
- ;; load
- (inst movups sse-temp1 (vect-ea vect1 index))
+ (:generator 10
- ;; operate
- (inst rsqrtps sse-temp2 sse-temp1)
+ ;; scale index by 4 (size-of single-float)
+ (inst shl index 2)
- ;; store
- (inst movups (vect-ea result index) sse-temp2)
- ))
+ ;; load
+ (inst ,mov-inst sse-temp1 (vect-ea vect1 index))
+ ;; operate
+ (inst ,op-inst sse-temp1)
+ ;; store
+ (inst ,mov-inst (vect-ea result index) sse-temp1)
+ )))
Index: sb-simd/generate-sse-instructions.lisp
diff -u sb-simd/generate-sse-instructions.lisp:1.3 sb-simd/generate-sse-instructions.lisp:1.4
--- sb-simd/generate-sse-instructions.lisp:1.3 Mon Aug 8 12:59:52 2005
+++ sb-simd/generate-sse-instructions.lisp Mon Aug 8 15:33:29 2005
@@ -7,49 +7,16 @@
TODO:
-CMPPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
-CMPPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
-CMPSD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37
-CMPSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
-
FXRSTOR. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 121
FXSAVE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 124
-HADDPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 126
-HADDPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 129
-HSUBPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 132
-HSUBPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 135
-
-LDDQU. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 138
LDMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 140
-MASKMOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 142
-
-MOVD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 173
-MOVDDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 176
MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 178
-MOVHLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 184
-
-MOVLHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 190
-
-MOVMSKPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 196
-MOVMSKPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 198
-MOVNTDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 200
-MOVNTPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 202
-MOVNTPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 204
-
MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208
-MOVSHDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 213
-MOVSLDUP . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 215
-
-PEXTRW. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 284
-PINSRW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 286
-
-PSHUFD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 314
-PSHUFHW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 317
-PSHUFLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 320
+(ib-forms:)
PSLLD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 323
PSLLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 326
PSLLQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 328
@@ -61,8 +28,6 @@
PSRLQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 344
PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 347
-SHUFPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 392
-SHUFPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 395
STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410
@@ -88,6 +53,8 @@
(andnps #x0F #x55)
(andps #x0F #x54)
(divps #x0F #x5E)
+ (haddps #xF2 #x0F #x7C)
+ (hsubps #xF2 #x0F #x7D)
(maxps #x0F #x5F)
(minps #x0F #x5D)
(mulps #x0F #x59)
@@ -106,10 +73,12 @@
(andnpd #x66 #x0F #x55)
(andpd #x66 #x0F #x54)
(divpd #x66 #x0F #x5E)
+ (haddpd #x66 #x0F #x7C)
+ (hsubpd #x66 #x0F #x7D)
(maxpd #x66 #x0F #x5F)
(minpd #x66 #x0F #x5D)
(mulpd #x66 #x0F #x59)
- (orps #x66 #x0F #x56)
+ (orpd #x66 #x0F #x56)
(sqrtpd #x66 #x0F #x51)
(subpd #x66 #x0F #x5C)
(unpckhpd #x66 #x0F #x15)
@@ -238,7 +207,20 @@
(cvttps2pi #x0F #x2C)
(cvttsd2si #xF2 #x0F #x2C)
(cvttss2si #xF3 #x0F #x2C)
-
+
+ ;; misc
+ (lddqu #xF2 #x0F #xF0)
+ (maskmovdqu #x66 #x0F #xF7)
+ (movddup #xF2 #x0F #x12)
+ (movhlps #x0F #x12)
+ (movlhps #x0F #x16)
+ (movmskpd #x66 #x0F #x50)
+ (movmskps #x0F #x50)
+ (movntdq #x66 #x0F #XE7)
+ (movntpd #x66 #x0F #x2B)
+ (movntps #x0F #x2B)
+ (movshdup #xF3 #x0F #x16)
+ (movsldup #xF3 #x0F #x12)
)
do
(format stream "~S~%~%"
@@ -247,11 +229,63 @@
,@(emit-ops ops)
(emit-ea segment src (reg-tn-encoding dst))))))
+
+ ;; INSTRUCTIONS WITH /r IB8
+ (loop for (inst . ops) in
+ '(
+ (pextrw #X66 #x0F #xC5)
+ (pinsrw #x66 #x0F #xC4)
+
+ (pshufd #x66 #x0F #x70)
+ (pshufhw #xF3 #x0F #x70)
+ (pshuflw #xF2 #x0F #x70)
+
+ (shufpd #x66 #x0F #xC6)
+ (shufps #x0F #xC6)
+
+ )
+ do
+ (format stream "~S~%~%"
+ `(define-instruction ,(intern (symbol-name inst)) (segment dst src byte)
+ (:emitter
+ ,@(emit-ops ops)
+ (emit-ea segment src (reg-tn-encoding dst))
+ (emit-sized-immediate segment :byte byte)
+ ))))
+
+ ;; COMPARE
+ (loop for (inst . ops) in
+ '(
+ (cmppd #x66 #x0F #xC2)
+ (cmpps #x0F #xC2)
+ (cmpsd #xF2 #x0F #xC2)
+ (cmpss #xF3 #x0F #xC2)
+ )
+ do
+ (format stream "~S~%~%"
+ `(define-instruction ,(intern (symbol-name inst)) (segment dst src cond)
+ (:emitter
+ ,@(emit-ops ops)
+ (emit-ea segment src (reg-tn-encoding dst))
+ (emit-sized-immediate segment :byte (cdr (assoc cond
+ '((:eq . #b000) (:e . #b000) (:z . #b000)
+ (:l . #b001) (:nge . #b001)
+ (:le . #b010) (:ng . #b010)
+ (:unord . #b011)
+ (:ne . #b100) (:nz . #b100)
+ (:nl . #b101) (:ge . #b101)
+ (:nle . #b110) (:g . #b110)
+ (:ord . #b111)
+ ))))
+ ))))
+
;; MOVES
(loop for (inst ops-m2r ops-r2m) in
'(
(movapd (#x66 #x0F #x28) (#x66 #x0F #x29))
(movaps (#x0F #x28) (#x0F #x29))
+
+ (movd (#x66 #x0F #x6E) (#x66 #x0F #x7E))
(movdqa (#x66 #x0F #x6F) (#x66 #x0F #x7F))
(movdqu (#xF3 #x0F #x6F) (#xF3 #x0F #x7F))
More information about the Sb-simd-cvs
mailing list