[sb-simd-cvs] CVS update: sb-simd/push-simd-features.lisp sb-simd/expand-parse-operand-temp-count.lisp sb-simd/test-seq.lisp sb-simd/sse-seq.lisp sb-simd/load.lisp

Risto Laakso rlaakso at common-lisp.net
Fri Aug 12 14:09:55 UTC 2005


Update of /project/sb-simd/cvsroot/sb-simd
In directory common-lisp.net:/tmp/cvs-serv3312

Modified Files:
	test-seq.lisp sse-seq.lisp load.lisp 
Added Files:
	push-simd-features.lisp expand-parse-operand-temp-count.lisp 
Log Message:

Date: Fri Aug 12 16:09:53 2005
Author: rlaakso





Index: sb-simd/test-seq.lisp
diff -u sb-simd/test-seq.lisp:1.1 sb-simd/test-seq.lisp:1.2
--- sb-simd/test-seq.lisp:1.1	Fri Aug 12 13:55:39 2005
+++ sb-simd/test-seq.lisp	Fri Aug 12 16:09:53 2005
@@ -3,14 +3,7 @@
 (declaim (optimize (speed 3) (safety 0) (space 0) (debug 0)))
 
 (defun sse-seq= (seq1 seq2)
-  (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2))
-  (multiple-value-bind (256blocks rest) (truncate (length seq1) (floor (log (/ 256 8) 2)))
-    (declare (ignore rest))
-    (and (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0)
-	 (loop for equal = t
-	       for i from (* 256blocks 32) below (length seq1) 
-	       when (/= (aref seq1 i) (aref seq2 i)) do (setq equal nil)
-	       finally (return equal)))))
+  (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0))
 
 (defun seq= (seq1 seq2)
   (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2))
@@ -22,11 +15,11 @@
 	     finally (return equal))))
 
 
-(defun test-seq (&optional (test-count 100000))
-  (let ((arr1 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0))
-	(arr2 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0))
-	(arr3 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0))
-	(arr4 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0))
+(defun test-seq (&optional (test-count 50000))
+  (let ((arr1 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0))
+	(arr2 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0))
+	(arr3 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0))
+	(arr4 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0))
 	res)
     
     (loop for i from 0 below (length arr1) 
@@ -37,19 +30,19 @@
 		   ))
 
     (setf (aref arr3 1200) (mod (1+ (aref arr3 1200)) 256)
-	  (aref arr4 256000) (mod (1+ (aref arr4 256000)) 256))
+	  (aref arr4 (- (length arr4) 2)) (mod (1+ (aref arr4 (- (length arr4) 2))) 256))
 
 ;;    (time (dotimes (i 100000) (sse-seq= arr1 arr2)))
 ;;    (time (dotimes (i #.(/ 100000 30)) (seq= arr1 arr2)))
 
     (format t "; seq= a1 a2~%")
-    (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr2)))))
+    (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr1 arr2)))))
 
     (format t "; seq= a1 a3~%")
-    (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr3)))))
+    (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr1 arr3)))))
 
     (format t "; seq= a2 a4~%")
-    (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr2 arr4)))))
+    (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr2 arr4)))))
 
 
     (format t "; sse-seq= a1 a2~%")


Index: sb-simd/sse-seq.lisp
diff -u sb-simd/sse-seq.lisp:1.1 sb-simd/sse-seq.lisp:1.2
--- sb-simd/sse-seq.lisp:1.1	Fri Aug 12 13:55:39 2005
+++ sb-simd/sse-seq.lisp	Fri Aug 12 16:09:53 2005
@@ -29,15 +29,15 @@
   (:TEMPORARY (:SC XMM-REG) X4)
   (:TEMPORARY (:SC XMM-REG) X5)
 
-;;  (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx)
-
+  (:TEMPORARY (:SC unsigned-reg :offset eax-offset :to (:result 0)) temp1)
+  (:TEMPORARY (:SC unsigned-reg :offset edx-offset) temp2)
   (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index)
   (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length)
 
   (:GENERATOR 10
 
     (let ((top (gen-label))
-;;	  (top2 (gen-label))
+	  (top2 (gen-label))
 	  (length-ok (gen-label))
 	  (fail (gen-label))
 	  (the-end (gen-label))
@@ -51,8 +51,7 @@
 	      (inst jmp :eq length-ok)
 
 	      ;; not same length, fail
-	      (inst mov result -1)
-	      (inst jmp end)
+	      (inst jmp fail)
 
 	      (emit-label length-ok)
 
@@ -66,8 +65,8 @@
 	      (inst xor index index)
 
 	      ;; zero eq-regs
-	      (inst pxor x4 x4)
-	      (inst pxor x5 x5)
+;;	      (inst pxor x4 x4)
+;;	      (inst pxor x5 x5)
 
 	      (emit-label top)
 
@@ -75,6 +74,9 @@
 	      (inst movdqu x0 (vect-ea seq1 index :xmmword))
 	      (inst movdqu x1 (vect-ea seq2 index :xmmword))
 
+	      (inst pxor x4 x4)
+	      (inst pxor x5 x5)
+
 	      ;; load second blocks
 	      (inst movdqu x2 
 		    (make-ea :xmmword :base seq1 :index index 
@@ -91,9 +93,18 @@
 	      ;; add index
 	      (inst add index 32)
 
-	      ;; or bits to eq-regs (if not eq, some bits will be nonzero)
-	      (inst por x4 x0)
-	      (inst por x5 x2)
+	      ;; check for non-equality
+	      (inst pcmpeqd x4 x0)
+	      (inst pcmpeqd x5 x2)
+
+	      (inst pmovmskb temp1 x4)
+	      (inst pmovmskb temp2 x5)
+
+	      (inst cmp temp1 #x0000FFFF)
+	      (inst jmp :ne fail)
+
+	      (inst cmp temp2 #x0000FFFF)
+	      (inst jmp :ne fail)
 
 	      ;; loop
 	      (inst dec length)
@@ -102,35 +113,46 @@
 
 	      ;; all 256bit blocks done
 
-	      ;; or each 32bit word from x4 to x5
-	      (inst por x4 x5)
-	      (inst movdqa x0 x4)
 
-	      (inst psrldq-ib x4 4)  ;; this is number of bytes, not bits
-	      (inst por x0 x4)
+	      ;; check remaining bytes
+	      (loadw length seq1 vector-length-slot other-pointer-lowtag)
+	      (inst shr length 2)
+ 	      (inst and length (1- (/ 256 8)))
+
+	      ;; no bytes left ?
+	      (inst test length length)
+	      (inst jmp :z end)
 
-	      (inst psrldq-ib x4 4)
-	      (inst por x0 x4)
+	      (inst xor temp1 temp1)
+	      (inst xor temp2 temp2)
 
-	      (inst psrldq-ib x4 4)
-	      (inst por x0 x4)
+	      (emit-label top2)
 
-	      ;; now low 32bits of x0 will be non-zero if seq's not equal
+	      ;; test bytes
+	      (inst movzx temp1 (vect-ea seq1 index :byte))
+	      (inst movzx temp2 (vect-ea seq2 index :byte))
+	      (inst xor temp1 temp2)
+	      (inst inc index)
 
-	      (inst movd result x0)
+	      ;; if not zero, fail
+	      (inst test temp1 temp1)
+	      (inst jmp :nz fail)
+
+	      ;; loop
+	      (inst dec length)
+	      (inst jmp :nz top2)
 
 	      ;; end
 	      (emit-label end)
 
-	      (inst test result result)
-	      (inst jmp :nz fail)
-
 	      (inst mov result (fixnumize 0))
 	      (inst jmp the-end)
 
+              ;; fail
 	      (emit-label fail)
 	      (inst mov result (fixnumize 1))
-	      
+
+	      ;; the-end
 	      (emit-label the-end)
 
 	      )))


Index: sb-simd/load.lisp
diff -u sb-simd/load.lisp:1.4 sb-simd/load.lisp:1.5
--- sb-simd/load.lisp:1.4	Fri Aug 12 13:55:39 2005
+++ sb-simd/load.lisp	Fri Aug 12 16:09:53 2005
@@ -14,6 +14,7 @@
 (if t
     (progn
       (load (compile-file "detect-simd.lisp"))
+      (load (compile-file "expand-parse-operand-temp-count.lisp"))
       (load (compile-file "timing.lisp"))
       (load (compile-file "sse-seq.lisp"))
       (load (compile-file "test-seq.lisp"))




More information about the Sb-simd-cvs mailing list