[Git][cmucl/cmucl][rtoy-amd64-p1] 8 commits: Add defvar *num-fixups*

Raymond Toy gitlab at common-lisp.net
Sun Apr 12 02:26:26 UTC 2020



Raymond Toy pushed to branch rtoy-amd64-p1 at cmucl / cmucl


Commits:
67500fc6 by Raymond Toy at 2020-04-06T17:05:02-07:00
Add defvar *num-fixups*

- - - - -
962f709d by Raymond Toy at 2020-04-06T17:05:18-07:00
Use #+double-double

- - - - -
5d19a2f6 by Raymond Toy at 2020-04-06T17:06:20-07:00
Update base-char-reg for unicode

Just porting over the x86 unicode base-char-reg stuff.  Base-chars
don't fit into byte regs anymore.

- - - - -
6a88e2a0 by Raymond Toy at 2020-04-06T17:06:41-07:00
Use movzx when loading type codes

- - - - -
bb504616 by Raymond Toy at 2020-04-06T17:07:52-07:00
Merge x86 char stuff, basically updating for unicode.

Also add _N"" in some places to make the difference between x86 and
amd64 less.

- - - - -
4ce27917 by Raymond Toy at 2020-04-06T17:08:51-07:00
Update string vops for unicode.

Mostly just copying the simple-array-unsigned-byte-16 vops to the
string vops since strings hold 16-bit chars.

- - - - -
b1d28406 by Raymond Toy at 2020-04-06T17:09:16-07:00
Port closure-tramp and undefined-tramp from x86

- - - - -
894977db by Raymond Toy at 2020-04-11T19:25:47-07:00
Import x86/lfoat-sse2.lisp

Modify slightly to replace uses of ebp with rbp and a few other
changes to use a 64-bit reg instead of a 32-bit reg.

- - - - -


8 changed files:

- src/assembly/amd64/assem-rtns.lisp
- src/compiler/amd64/array.lisp
- src/compiler/amd64/char.lisp
- + src/compiler/amd64/float-sse2.lisp
- src/compiler/amd64/macros.lisp
- src/compiler/amd64/vm.lisp
- src/compiler/generic/objdef.lisp
- src/tools/cross-scripts/cross-x86-amd64.lisp


Changes:

=====================================
src/assembly/amd64/assem-rtns.lisp
=====================================
@@ -64,7 +64,8 @@
   (inst lea rdi (make-ea :qword :base rbx :disp (- word-bytes)))
   (inst rep)
   (inst movs :qword)
-
+  (inst cld)
+  
   ;; Restore the count.
   (inst mov rcx rdx)
 
@@ -159,6 +160,7 @@
   (inst sub rsi word-bytes)
   (inst rep)
   (inst movs :qword)
+  (inst cld)
 
   ;; Load the register arguments carefully.
   (loadw rdx rbp-tn -1)
@@ -274,3 +276,23 @@
 
   (inst jmp (make-ea :byte :base block
 		     :disp (* unwind-block-entry-pc-slot word-bytes))))
+
+#+assembler
+(define-assembly-routine (closure-tramp
+			  (:return-style :none))
+                         ()
+  (loadw rax-tn rax-tn fdefn-function-slot other-pointer-type)
+  (inst jmp (make-ea :qword :base rax-tn
+		     :disp (- (* closure-function-slot word-bytes)
+			      function-pointer-type))))
+
+#+assembler
+(define-assembly-routine (undefined-tramp
+			  (:return-style :none))
+                         ()
+  (let ((error (generate-error-code nil undefined-symbol-error
+				    (make-random-tn :kind :normal
+						    :sc (sc-or-lose 'descriptor-reg c::*backend*)
+						    :offset 0))))
+    (inst jmp error)
+    (inst ret)))


=====================================
src/compiler/amd64/array.lisp
=====================================
@@ -1234,20 +1234,13 @@
   (:args (object :scs (descriptor-reg))
 	 (index :scs (unsigned-reg)))
   (:arg-types simple-string positive-fixnum)
-  (:temporary (:sc unsigned-reg ; byte-reg
-		   :offset rax-offset ; al-offset
-		   :target value
-		   :from (:eval 0) :to (:result 0))
-	      rax)
-  (:ignore rax)
   (:results (value :scs (base-char-reg)))
   (:result-types base-char)
   (:generator 5
-    (inst mov al-tn
-	  (make-ea :byte :base object :index index :scale 1
+    (inst movzx value
+	  (make-ea :word :base object :index index :scale 2
 		   :disp (- (* vector-data-offset word-bytes)
-			    other-pointer-type)))
-    (move value al-tn)))
+			    other-pointer-type)))))
 
 (define-vop (data-vector-ref-c/simple-string)
   (:translate data-vector-ref)
@@ -1255,18 +1248,13 @@
   (:args (object :scs (descriptor-reg)))
   (:info index)
   (:arg-types simple-string (:constant (signed-byte 30)))
-  (:temporary (:sc unsigned-reg :offset rax-offset :target value
-		   :from (:eval 0) :to (:result 0))
-	      rax)
-  (:ignore rax)
   (:results (value :scs (base-char-reg)))
   (:result-types base-char)
   (:generator 4
-    (inst mov al-tn
-	  (make-ea :byte :base object
-		   :disp (- (+ (* vector-data-offset word-bytes) index)
-			    other-pointer-type)))
-    (move value al-tn)))
+    (inst movzx value
+	  (make-ea :word :base object
+		   :disp (- (+ (* vector-data-offset word-bytes) (* 2 index))
+			    other-pointer-type)))))
 
 
 (define-vop (data-vector-set/simple-string)
@@ -1276,14 +1264,18 @@
 	 (index :scs (unsigned-reg) :to (:eval 0))
 	 (value :scs (base-char-reg)))
   (:arg-types simple-string positive-fixnum base-char)
-  (:results (result :scs (base-char-reg)))
+   (:temporary (:sc unsigned-reg :offset rax-offset :target result
+		   :from (:argument 2) :to (:result 0))
+	      rax)
+ (:results (result :scs (base-char-reg)))
   (:result-types base-char)
   (:generator 5 
-    (inst mov (make-ea :byte :base object :index index :scale 1
+    (move rax value)
+    (inst mov (make-ea :word :base object :index index :scale 2
 		       :disp (- (* vector-data-offset word-bytes)
 				other-pointer-type))
-	  value)
-    (move result value)))
+	  ax-tn)
+    (move result rax)))
 
 
 (define-vop (data-vector-set/simple-string-c)
@@ -1293,14 +1285,19 @@
 	 (value :scs (base-char-reg)))
   (:info index)
   (:arg-types simple-string (:constant (signed-byte 30)) base-char)
+  (:temporary (:sc unsigned-reg :offset rax-offset :target result
+		   :from (:argument 1) :to (:result 0))
+	      rax)
   (:results (result :scs (base-char-reg)))
   (:result-types base-char)
   (:generator 4
-   (inst mov (make-ea :byte :base object
-		      :disp (- (+ (* vector-data-offset word-bytes) index)
-			       other-pointer-type))
-	 value)
-   (move result value)))
+    (move rax value)
+    (inst mov (make-ea :word :base object
+		       :disp (- (+ (* vector-data-offset word-bytes)
+				   (* 2 index))
+				other-pointer-type))
+	  ax-tn)
+    (move result rax)))
 
 
 ;;; signed-byte-8


=====================================
src/compiler/amd64/char.lisp
=====================================
@@ -30,41 +30,31 @@
 ;;; Move a tagged char to an untagged representation.
 ;;;
 (define-vop (move-to-base-char)
-  (:args (x :scs (any-reg control-stack) :target al))
-  (:temporary (:sc byte-reg :offset al-offset
-		   :from (:argument 0) :to (:eval 0)) al)
-  (:ignore al)
-  (:temporary (:sc byte-reg :offset ah-offset :target y
-		   :from (:argument 0) :to (:result 0)) ah)
-  (:results (y :scs (base-char-reg base-char-stack)))
-  (:note "character untagging")
+  (:args (x :scs (any-reg control-stack) :target y))
+  (:results (y :scs (base-char-reg)))
+  (:note _N"character untagging")
   (:generator 1
-    (move rax-tn x)
-    (move y ah)))
+    (move y x)
+    (inst shr y type-bits)))
 ;;;
 (define-move-vop move-to-base-char :move
-  (any-reg control-stack) (base-char-reg base-char-stack))
+  (any-reg control-stack) (base-char-reg))
 
 
 ;;; Move an untagged char to a tagged representation.
 ;;;
 (define-vop (move-from-base-char)
-  (:args (x :scs (base-char-reg base-char-stack) :target ah))
-  (:temporary (:sc byte-reg :offset al-offset :target y
-		   :from (:argument 0) :to (:result 0)) al)
-  (:temporary (:sc byte-reg :offset ah-offset
-		   :from (:argument 0) :to (:result 0)) ah)
-  (:results (y :scs (any-reg descriptor-reg control-stack)))
-  (:note "character tagging")
+  (:args (x :scs (base-char-reg base-char-stack) :target y))
+  (:results (y :scs (any-reg descriptor-reg)))
+  (:note _N"character tagging")
   (:generator 1
-    (move ah x)				; maybe move char byte
-    (inst mov al base-char-type)	; #x86 to type bits
-    (inst and rax-tn #xffff)		; remove any junk bits
-    (move y rax-tn)))
+    (move y x)
+    (inst shl y type-bits)
+    (inst or y base-char-type)))
 
 ;;;
 (define-move-vop move-from-base-char :move
-  (base-char-reg base-char-stack) (any-reg descriptor-reg control-stack))
+  (base-char-reg base-char-stack) (any-reg descriptor-reg))
 
 ;;; Move untagged base-char values.
 ;;;
@@ -74,7 +64,7 @@
 	    :load-if (not (location= x y))))
   (:results (y :scs (base-char-reg base-char-stack)
 	       :load-if (not (location= x y))))
-  (:note "character move")
+  (:note _N"character move")
   (:effects)
   (:affected)
   (:generator 0
@@ -92,15 +82,13 @@
 	 (fp :scs (any-reg)
 	     :load-if (not (sc-is y base-char-reg))))
   (:results (y))
-  (:note "character arg move")
+  (:note _N"character arg move")
   (:generator 0
     (sc-case y
       (base-char-reg
        (move y x))
       (base-char-stack
-       (inst mov
-	     (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) word-bytes)))
-	     x)))))
+       (storew x fp (- (1+ (tn-offset y))))))))
 ;;;
 (define-move-vop move-base-char-argument :move-argument
   (any-reg base-char-reg) (base-char-reg))
@@ -119,29 +107,22 @@
 (define-vop (char-code)
   (:translate char-code)
   (:policy :fast-safe)
-  (:args (ch :scs (base-char-reg base-char-stack)))
+  (:args (ch :scs (base-char-reg base-char-stack) :target res))
   (:arg-types base-char)
   (:results (res :scs (unsigned-reg)))
   (:result-types positive-fixnum)
   (:generator 1
-	      ;; ah to dh are not addressable when a rex prefix is used
-	      ;; The high 32 bits of doubleword operands are
-	      ;; zero-extended to 64-bits.
-    (inst movzx (64-bit-to-32-bit-tn res) ch)))
+    (move res ch)))
 
 (define-vop (code-char)
   (:translate code-char)
   (:policy :fast-safe)
-  (:args (code :scs (unsigned-reg unsigned-stack) :target rax))
+  (:args (code :scs (unsigned-reg control-stack) :target res))
   (:arg-types positive-fixnum)
-  (:temporary (:sc unsigned-reg :offset rax-offset :target res
-		   :from (:argument 0) :to (:result 0))
-	      rax)
   (:results (res :scs (base-char-reg)))
   (:result-types base-char)
   (:generator 1
-    (move rax code)
-    (move res al-tn)))
+    (move res code)))
 
 
 ;;; Comparison of base-chars.
@@ -155,7 +136,7 @@
   (:conditional)
   (:info target not-p)
   (:policy :fast-safe)
-  (:note "inline comparison")
+  (:note _N"inline comparison")
   (:variant-vars condition not-condition)
   (:generator 3
     (inst cmp x y)
@@ -179,7 +160,7 @@
   (:conditional)
   (:info target not-p y)
   (:policy :fast-safe)
-  (:note "inline comparison")
+  (:note _N"inline comparison")
   (:variant-vars condition not-condition)
   (:generator 2
     (inst cmp x (char-code y))


=====================================
src/compiler/amd64/float-sse2.lisp
=====================================
@@ -0,0 +1,2391 @@
+;;; -*- Mode: LISP; Syntax: Common-Lisp; Base: 10; Package: x86 -*-
+;;;
+;;; **********************************************************************
+;;; This code was written as part of the CMU Common Lisp project at
+;;; Carnegie Mellon University, and has been placed in the public domain.
+;;; If you want to use this code or any part of CMU Common Lisp, please contact
+;;; Scott Fahlman or slisp-group at cs.cmu.edu.
+;;;
+(ext:file-comment
+  "$Header: src/compiler/x86/float-sse2.lisp $")
+;;;
+;;; **********************************************************************
+;;;
+;;; This file contains floating point support for the x86.
+;;;
+
+(in-package :amd64)
+(intl:textdomain "cmucl-sse2")
+
+;;; Popping the FP stack.
+;;;
+;;; The default is to use a store and pop, fstp fr0.
+;;; For the AMD Athlon, using ffreep fr0 is faster.
+;;;
+(defun fp-pop ()
+  (if (backend-featurep :athlon)
+      (inst ffreep fr0-tn)
+      (inst fstp fr0-tn)))
+
+
+(macrolet ((ea-for-xf-desc (tn slot)
+	     `(make-ea
+	       :dword :base ,tn
+	       :disp (- (* ,slot vm:word-bytes) vm:other-pointer-type))))
+  (defun ea-for-sf-desc (tn)
+    (ea-for-xf-desc tn vm:single-float-value-slot))
+  (defun ea-for-df-desc (tn)
+    (ea-for-xf-desc tn vm:double-float-value-slot))
+  #+long-float
+  (defun ea-for-lf-desc (tn)
+    (ea-for-xf-desc tn vm:long-float-value-slot))
+  ;; Complex floats
+  (defun ea-for-csf-real-desc (tn)
+    (ea-for-xf-desc tn vm:complex-single-float-real-slot))
+  (defun ea-for-csf-imag-desc (tn)
+    (ea-for-xf-desc tn vm:complex-single-float-imag-slot))
+  (defun ea-for-cdf-real-desc (tn)
+    (ea-for-xf-desc tn vm:complex-double-float-real-slot))
+  (defun ea-for-cdf-imag-desc (tn)
+    (ea-for-xf-desc tn vm:complex-double-float-imag-slot))
+  #+long-float
+  (defun ea-for-clf-real-desc (tn)
+    (ea-for-xf-desc tn vm:complex-long-float-real-slot))
+  #+long-float
+  (defun ea-for-clf-imag-desc (tn)
+    (ea-for-xf-desc tn vm:complex-long-float-imag-slot))
+  #+double-double
+  (defun ea-for-cddf-real-hi-desc (tn)
+    (ea-for-xf-desc tn vm:complex-double-double-float-real-hi-slot))
+  #+double-double
+  (defun ea-for-cddf-real-lo-desc (tn)
+    (ea-for-xf-desc tn vm:complex-double-double-float-real-lo-slot))
+  #+double-double
+  (defun ea-for-cddf-imag-hi-desc (tn)
+    (ea-for-xf-desc tn vm:complex-double-double-float-imag-hi-slot))
+  #+double-double
+  (defun ea-for-cddf-imag-lo-desc (tn)
+    (ea-for-xf-desc tn vm:complex-double-double-float-imag-lo-slot))
+  )
+
+(macrolet ((ea-for-xf-stack (tn kind)
+	     `(make-ea
+	       :dword :base rbp-tn
+	       :disp (- (* (+ (tn-offset ,tn)
+			      (ecase ,kind (:single 1) (:double 2) (:long 3)))
+			 vm:word-bytes)))))
+  (defun ea-for-sf-stack (tn)
+    (ea-for-xf-stack tn :single))
+  (defun ea-for-df-stack (tn)
+    (ea-for-xf-stack tn :double))
+  #+long-float
+  (defun ea-for-lf-stack (tn)
+    (ea-for-xf-stack tn :long)))
+
+;;; Complex float stack EAs
+(macrolet ((ea-for-cxf-stack (tn kind slot &optional base)
+	     `(make-ea
+	       :qword :base ,base
+	       :disp (- (* (+ (tn-offset ,tn)
+			      (* (ecase ,kind
+				   (:single 1)
+				   (:double 2)
+				   (:long 3))
+				 (ecase ,slot
+				   ;; We want the real part to be at
+				   ;; the lower address!
+				   (:real 2)
+				   (:imag 1)
+				   (:real-hi 1)
+				   (:real-lo 2)
+				   (:imag-hi 3)
+				   (:imag-lo 4))))
+			 vm:word-bytes)))))
+  (defun ea-for-csf-real-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :single :real base))
+  (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :single :imag base))
+  (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :real base))
+  (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :imag base))
+  ;;
+  #+long-float
+  (defun ea-for-clf-real-stack (tn &optional (base ebp-tn))
+    (ea-for-cxf-stack tn :long :real base))
+  #+long-float
+  (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn))
+    (ea-for-cxf-stack tn :long :imag base))
+
+  #+double-double
+  (defun ea-for-cddf-real-hi-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :real-hi base))
+  #+double-double
+  (defun ea-for-cddf-real-lo-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :real-lo base))
+  #+double-double
+  (defun ea-for-cddf-imag-hi-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :imag-hi base))
+  #+double-double
+  (defun ea-for-cddf-imag-lo-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :imag-lo base))
+  )
+
+;;; The x86 can't store a long-float to memory without popping the
+;;; stack and marking a register as empty, so it is necessary to
+;;; restore the register from memory.
+(defun store-long-float (ea)
+   (inst fstpl ea)
+   (inst fldl ea))
+
+
+;;;; Move functions:
+
+;;; x is source, y is destination
+(define-move-function (load-single 2) (vop x y)
+  ((single-stack) (single-reg))
+  (inst movss y (ea-for-sf-stack x)))
+
+(define-move-function (store-single 2) (vop x y)
+  ((single-reg) (single-stack))
+  (inst movss (ea-for-sf-stack y) x))
+
+(define-move-function (load-double 2) (vop x y)
+  ((double-stack) (double-reg))
+  (inst movsd y (ea-for-df-stack x)))
+
+(define-move-function (store-double 2) (vop x y)
+  ((double-reg) (double-stack))
+  (inst movsd (ea-for-df-stack y) x))
+
+#+long-float
+(define-move-function (load-long 2) (vop x y)
+  ((long-stack) (long-reg))
+  (with-empty-tn at fp-top(y)
+     (inst fldl (ea-for-lf-stack x))))
+
+#+long-float
+(define-move-function (store-long 2) (vop x y)
+  ((long-reg) (long-stack))
+  (cond ((zerop (tn-offset x))
+	 (store-long-float (ea-for-lf-stack y)))
+	(t
+	 (inst fxch x)
+	 (store-long-float (ea-for-lf-stack y))
+	 ;; This may not be necessary as ST0 is likely invalid now.
+	 (inst fxch x))))
+
+(define-move-function (load-fp-constant 2) (vop x y)
+  ((fp-constant) (single-reg double-reg))
+  (let ((value (c::constant-value (c::tn-leaf x))))
+    (cond ((and (zerop value)
+		(= (float-sign value) 1))
+	   (sc-case y
+	     (single-reg (inst xorps y y))
+	     (double-reg (inst xorpd y y))))
+	  (t
+	   (warn (intl:gettext "Ignoring bogus i387 Constant ~a") value)))))
+
+
+;;;; Complex float move functions
+
+(defun complex-single-reg-real-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg *backend*)
+		  :offset (tn-offset x)))
+(defun complex-single-reg-imag-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg *backend*)
+		  :offset (1+ (tn-offset x))))
+
+(defun complex-double-reg-real-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (tn-offset x)))
+(defun complex-double-reg-imag-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (1+ (tn-offset x))))
+
+#+long-float
+(defun complex-long-reg-real-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg *backend*)
+		  :offset (tn-offset x)))
+#+long-float
+(defun complex-long-reg-imag-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg *backend*)
+		  :offset (1+ (tn-offset x))))
+
+#+double-double
+(progn
+(defun complex-double-double-reg-real-hi-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (tn-offset x)))
+(defun complex-double-double-reg-real-lo-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (+ 1 (tn-offset x))))
+(defun complex-double-double-reg-imag-hi-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (+ 2 (tn-offset x))))
+(defun complex-double-double-reg-imag-lo-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (+ 3 (tn-offset x))))
+)
+;;; x is source, y is destination
+(define-move-function (load-complex-single 2) (vop x y)
+  ((complex-single-stack) (complex-single-reg))
+  (inst movlps y (ea-for-csf-real-stack x)))
+
+(define-move-function (store-complex-single 2) (vop x y)
+  ((complex-single-reg) (complex-single-stack))
+  (inst movlps (ea-for-csf-real-stack y) x))
+
+(define-move-function (load-complex-double 2) (vop x y)
+  ((complex-double-stack) (complex-double-reg))
+  (inst movupd y (ea-for-cdf-real-stack x)))
+
+(define-move-function (store-complex-double 2) (vop x y)
+  ((complex-double-reg) (complex-double-stack))
+  (inst movupd (ea-for-cdf-real-stack y) x))
+
+#+long-float
+(define-move-function (load-complex-long 2) (vop x y)
+  ((complex-long-stack) (complex-long-reg))
+  (let ((real-tn (complex-long-reg-real-tn y)))
+    (with-empty-tn at fp-top(real-tn)
+      (inst fldl (ea-for-clf-real-stack x))))
+  (let ((imag-tn (complex-long-reg-imag-tn y)))
+    (with-empty-tn at fp-top(imag-tn)
+      (inst fldl (ea-for-clf-imag-stack x)))))
+
+#+long-float
+(define-move-function (store-complex-long 2) (vop x y)
+  ((complex-long-reg) (complex-long-stack))
+  (let ((real-tn (complex-long-reg-real-tn x)))
+    (cond ((zerop (tn-offset real-tn))
+	   (store-long-float (ea-for-clf-real-stack y)))
+	  (t
+	   (inst fxch real-tn)
+	   (store-long-float (ea-for-clf-real-stack y))
+	   (inst fxch real-tn))))
+  (let ((imag-tn (complex-long-reg-imag-tn x)))
+    (inst fxch imag-tn)
+    (store-long-float (ea-for-clf-imag-stack y))
+    (inst fxch imag-tn)))
+
+#+double-double
+(progn
+(define-move-function (load-complex-double-double 4) (vop x y)
+  ((complex-double-double-stack) (complex-double-double-reg))
+  (let ((real-tn (complex-double-double-reg-real-hi-tn y)))
+    (inst movsd real-tn (ea-for-cddf-real-hi-stack x)))
+  (let ((real-tn (complex-double-double-reg-real-lo-tn y)))
+    (inst movsd real-tn (ea-for-cddf-real-lo-stack x)))
+  (let ((imag-tn (complex-double-double-reg-imag-hi-tn y)))
+    (inst movsd imag-tn (ea-for-cddf-imag-hi-stack x)))
+  (let ((imag-tn (complex-double-double-reg-imag-lo-tn y)))
+    (inst movsd imag-tn (ea-for-cddf-imag-lo-stack x))))
+
+(define-move-function (store-complex-double-double 4) (vop x y)
+  ((complex-double-double-reg) (complex-double-double-stack))
+  ;; FIXME: These may not be right!!!!
+  (let ((real-tn (complex-double-double-reg-real-hi-tn x)))
+    (inst movsd (ea-for-cddf-real-hi-stack y) real-tn))
+  (let ((real-tn (complex-double-double-reg-real-lo-tn x)))
+    (inst movsd (ea-for-cddf-real-lo-stack y) real-tn))
+  (let ((imag-tn (complex-double-double-reg-imag-hi-tn x)))
+    (inst movsd (ea-for-cddf-imag-hi-stack y) imag-tn))
+  (let ((imag-tn (complex-double-double-reg-imag-lo-tn x)))
+    (inst movsd (ea-for-cddf-imag-lo-stack y) imag-tn)))
+
+)
+
+;;;; Move VOPs:
+
+;;;
+;;; Float register to register moves.
+;;;
+#+nil
+(define-vop (float-move)
+  (:args (x))
+  (:results (y))
+  (:note _N"float move")
+  (:generator 0
+     (unless (location= x y)
+       (inst movq y x))))
+
+(define-vop (float-move/single)
+  (:args (x))
+  (:results (y))
+  (:note _N"float move")
+  (:temporary (:sc single-stack) temp)
+  (:generator 0
+    (unless (location= x y)
+      (let ((x-offset (tn-offset x))
+	    (y-offset (tn-offset y)))
+	(cond ((and (zerop x-offset)
+		    (>= y-offset 8))
+	       ;; Move fr0 to xmm
+	       (inst fst (ea-for-sf-stack temp))
+	       (inst movss y (ea-for-sf-stack temp)))
+	      ((and (>= x-offset 8)
+		    (>= y-offset 8))
+	       (inst movq y x))
+	      (t
+	       (error "Don't know how to move ~S to ~S" x y)))))))
+
+(define-vop (float-move/double)
+  (:args (x))
+  (:results (y))
+  (:note _N"float move")
+  (:temporary (:sc double-stack) temp)
+  (:generator 0
+    (unless (location= x y)
+      (let ((x-offset (tn-offset x))
+	    (y-offset (tn-offset y)))
+	(cond ((and (zerop x-offset)
+		    (>= y-offset 8))
+	       ;; Move fr0 to xmm
+	       (inst fstd (ea-for-df-stack temp))
+	       (inst movsd y (ea-for-df-stack temp)))
+	      ((and (>= x-offset 8)
+		    (>= y-offset 8))
+	       (inst movq y x))
+	      (t
+	       (error "Don't know how to move ~S to ~S" x y)))))))
+
+(define-vop (single-move float-move/single)
+  (:args (x :scs (single-reg) :target y :load-if (not (location= x y))))
+  (:results (y :scs (single-reg) :load-if (not (location= x y)))))
+
+(define-move-vop single-move :move (single-reg) (single-reg))
+
+(define-vop (double-move float-move/double)
+  (:args (x :scs (double-reg) :target y :load-if (not (location= x y))))
+  (:results (y :scs (double-reg) :load-if (not (location= x y)))))
+(define-move-vop double-move :move (double-reg) (double-reg))
+
+#+long-float
+(define-vop (long-move float-move)
+  (:args (x :scs (long-reg) :target y :load-if (not (location= x y))))
+  (:results (y :scs (long-reg) :load-if (not (location= x y)))))
+#+long-float
+(define-move-vop long-move :move (long-reg) (long-reg))
+
+;;;
+;;; Complex float register to register moves.
+;;;
+(define-vop (complex-single-move)
+  (:args (x :scs (complex-single-reg) :target y
+	    :load-if (not (location= x y))))
+  (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))
+  (:generator 0
+    (unless (location= x y)
+      (inst movaps y x))))
+
+(define-move-vop complex-single-move :move
+  (complex-single-reg) (complex-single-reg))
+
+(define-vop (complex-double-move)
+  (:args (x :scs (complex-double-reg)
+	    :target y :load-if (not (location= x y))))
+  (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))
+  (:generator 0
+    (unless (location= x y)
+      (inst movapd y x))))
+
+(define-move-vop complex-double-move :move
+  (complex-double-reg) (complex-double-reg))
+
+    
+#+long-float
+(define-vop (complex-long-move complex-float-move)
+  (:args (x :scs (complex-long-reg)
+	    :target y :load-if (not (location= x y))))
+  (:results (y :scs (complex-long-reg) :load-if (not (location= x y)))))
+#+long-float
+(define-move-vop complex-long-move :move
+  (complex-long-reg) (complex-long-reg))
+
+
+;;;
+;;; Move from float to a descriptor reg. allocating a new float
+;;; object in the process.
+;;;
+(define-vop (move-from-single)
+  (:args (x :scs (single-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:single-float-type vm:single-float-size node)
+       (inst movss (ea-for-sf-desc y) x))))
+(define-move-vop move-from-single :move
+  (single-reg) (descriptor-reg))
+
+(define-vop (move-from-double)
+  (:args (x :scs (double-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:double-float-type vm:double-float-size node)
+       (inst movsd (ea-for-df-desc y) x))))
+(define-move-vop move-from-double :move
+  (double-reg) (descriptor-reg))
+
+#+long-float
+(define-vop (move-from-long)
+  (:args (x :scs (long-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:long-float-type vm:long-float-size node)
+       (with-tn at fp-top(x)
+	 (store-long-float (ea-for-lf-desc y))))))
+#+long-float
+(define-move-vop move-from-long :move
+  (long-reg) (descriptor-reg))
+
+(define-vop (move-from-fp-constant)
+  (:args (x :scs (fp-constant)))
+  (:results (y :scs (descriptor-reg)))
+  (:generator 2
+     (ecase (c::constant-value (c::tn-leaf x))
+       (0f0 (load-symbol-value y *fp-constant-0s0*))
+       #+nil
+       (1f0 (load-symbol-value y *fp-constant-1s0*))
+       (0d0 (load-symbol-value y *fp-constant-0d0*))
+       #+nil
+       (1d0 (load-symbol-value y *fp-constant-1d0*)))))
+(define-move-vop move-from-fp-constant :move
+  (fp-constant) (descriptor-reg))
+
+;;;
+;;; Move from a descriptor to a float register
+;;;
+(define-vop (move-to-single)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (single-reg)))
+  (:note _N"pointer to float coercion")
+  (:generator 2
+    (inst movss y (ea-for-sf-desc x))))
+(define-move-vop move-to-single :move (descriptor-reg) (single-reg))
+
+(define-vop (move-to-double)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (double-reg)))
+  (:note _N"pointer to float coercion")
+  (:generator 2
+    (inst movsd y (ea-for-df-desc x))))
+(define-move-vop move-to-double :move (descriptor-reg) (double-reg))
+
+#+long-float
+(define-vop (move-to-long)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (long-reg)))
+  (:note _N"pointer to float coercion")
+  (:generator 2
+     (with-empty-tn at fp-top(y)
+       (inst fldl (ea-for-lf-desc x)))))
+#+long-float
+(define-move-vop move-to-long :move (descriptor-reg) (long-reg))
+
+
+;;;
+;;; Move from complex float to a descriptor reg. allocating a new
+;;; complex float object in the process.
+;;;
+(define-vop (move-from-complex-single)
+  (:args (x :scs (complex-single-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"complex float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:complex-single-float-type
+			       vm:complex-single-float-size node)
+       (inst movlps (ea-for-csf-real-desc y) x))))
+(define-move-vop move-from-complex-single :move
+  (complex-single-reg) (descriptor-reg))
+
+(define-vop (move-from-complex-double)
+  (:args (x :scs (complex-double-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"complex float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:complex-double-float-type
+			       vm:complex-double-float-size node)
+       (inst movupd (ea-for-cdf-real-desc y) x))))
+
+(define-move-vop move-from-complex-double :move
+  (complex-double-reg) (descriptor-reg))
+
+#+long-float
+(define-vop (move-from-complex-long)
+  (:args (x :scs (complex-long-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"complex float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:complex-long-float-type
+			       vm:complex-long-float-size node)
+       (let ((real-tn (complex-long-reg-real-tn x)))
+	 (with-tn at fp-top(real-tn)
+	   (store-long-float (ea-for-clf-real-desc y))))
+       (let ((imag-tn (complex-long-reg-imag-tn x)))
+	 (with-tn at fp-top(imag-tn)
+	   (store-long-float (ea-for-clf-imag-desc y)))))))
+#+long-float
+(define-move-vop move-from-complex-long :move
+  (complex-long-reg) (descriptor-reg))
+
+#+double-double
+(define-vop (move-from-complex-double-double)
+  (:args (x :scs (complex-double-double-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"complex double-double float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm::complex-double-double-float-type
+			       vm::complex-double-double-float-size node)
+       (let ((real-tn (complex-double-double-reg-real-hi-tn x)))
+	 (inst movsd (ea-for-cddf-real-hi-desc y) real-tn))
+       (let ((real-tn (complex-double-double-reg-real-lo-tn x)))
+	 (inst movsd (ea-for-cddf-real-lo-desc y) real-tn))
+       (let ((imag-tn (complex-double-double-reg-imag-hi-tn x)))
+	 (inst movsd (ea-for-cddf-imag-hi-desc y) imag-tn))
+       (let ((imag-tn (complex-double-double-reg-imag-lo-tn x)))
+	 (inst movsd (ea-for-cddf-imag-lo-desc y) imag-tn)))))
+;;;
+#+double-double
+(define-move-vop move-from-complex-double-double :move
+  (complex-double-double-reg) (descriptor-reg))
+;;;
+;;; Move from a descriptor to a complex float register
+;;;
+(define-vop (move-to-complex-single)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (complex-single-reg)))
+  (:note _N"pointer to complex float coercion")
+  (:generator 2
+    (inst movlps y (ea-for-csf-real-desc x))))
+
+(define-move-vop move-to-complex-single :move
+  (descriptor-reg) (complex-single-reg))
+
+(define-vop (move-to-complex-double)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (complex-double-reg)))
+  (:note _N"pointer to complex float coercion")
+  (:generator 2
+    (inst movupd y (ea-for-cdf-real-desc x))))
+
+(define-move-vop move-to-complex-double :move
+  (descriptor-reg) (complex-double-reg))
+
+
+;;;
+;;; The move argument vops.
+;;;
+;;; Note these are also used to stuff fp numbers onto the c-call stack
+;;; so the order is different than the lisp-stack.
+
+;;; The general move-argument vop
+(macrolet ((frob (name sc stack-sc format)
+	     `(progn
+		(define-vop (,name)
+		  (:args (x :scs (,sc) :target y)
+			 (fp :scs (any-reg)
+			     :load-if (not (sc-is y ,sc))))
+		  (:results (y))
+		  (:note _N"float argument move")
+		  (:generator ,(case format (:single 2) (:double 3) (:long 4))
+		    (sc-case y
+		      (,sc
+		       (unless (location= x y)
+			 (inst movq y x)))
+		      (,stack-sc
+		       (if (= (tn-offset fp) esp-offset)
+			   (let* ((offset (* (tn-offset y) word-bytes))
+				  (ea (make-ea :dword :base fp :disp offset)))
+			     ,@(ecase format
+				      (:single '((inst movss ea x)))
+				      (:double '((inst movsd ea x)))))
+			   (let ((ea (make-ea
+				      :dword :base fp
+				      :disp (- (* (+ (tn-offset y)
+						     ,(case format
+							    (:single 1)
+							    (:double 2)
+							    (:long 3)))
+						  vm:word-bytes)))))
+			     ,@(ecase format 
+				      (:single '((inst movss ea x)))
+				      (:double '((inst movsd ea x))))))))))
+		(define-move-vop ,name :move-argument
+		  (,sc descriptor-reg) (,sc)))))
+  (frob move-single-float-argument single-reg single-stack :single)
+  (frob move-double-float-argument double-reg double-stack :double))
+
+;;;; Complex float move-argument vop
+(define-vop (move-complex-single-float-argument)
+  (:args (x :scs (complex-single-reg) :target y)
+	 (fp :scs (any-reg)
+	     :load-if (not (sc-is y complex-single-reg))))
+  (:results (y))
+  (:note _N"complex float argument move")
+  (:generator 3
+    (sc-case y
+      (complex-single-reg
+       (unless (location= x y)
+	 (inst movaps y x)))
+      (complex-single-stack
+       (inst movlps (ea-for-csf-real-stack y fp) x)))))
+
+(define-move-vop move-complex-single-float-argument :move-argument
+  (complex-single-reg descriptor-reg) (complex-single-reg))
+
+(define-vop (move-complex-double-float-argument)
+  (:args (x :scs (complex-double-reg) :target y)
+	 (fp :scs (any-reg)
+	     :load-if (not (sc-is y complex-double-reg))))
+  (:results (y))
+  (:note _N"complex float argument move")
+  (:generator 3
+    (sc-case y
+      (complex-double-reg
+       (unless (location= x y)
+	 (inst movapd y x)))
+      (complex-double-stack
+       (inst movupd (ea-for-cdf-real-stack y fp) x)))))
+
+(define-move-vop move-complex-double-float-argument :move-argument
+  (complex-double-reg descriptor-reg) (complex-double-reg))
+
+#+double-double
+(define-vop (move-complex-double-double-float-argument)
+  (:args (x :scs (complex-double-double-reg) :target y)
+	 (fp :scs (any-reg) :load-if (not (sc-is y complex-double-double-reg))))
+  (:results (y))
+  (:note _N"complex double-double-float argument move")
+  (:generator 2
+    (sc-case y
+      (complex-double-double-reg
+       (unless (location= x y)
+	 (let ((x-real (complex-double-double-reg-real-hi-tn x))
+	       (y-real (complex-double-double-reg-real-hi-tn y)))
+	   (inst movsd y-real x-real))
+	 (let ((x-real (complex-double-double-reg-real-lo-tn x))
+	       (y-real (complex-double-double-reg-real-lo-tn y)))
+	   (inst movsd y-real x-real))
+	 (let ((x-imag (complex-double-double-reg-imag-hi-tn x))
+	       (y-imag (complex-double-double-reg-imag-hi-tn y)))
+	   (inst movsd y-imag x-imag))
+	 (let ((x-imag (complex-double-double-reg-imag-lo-tn x))
+	       (y-imag (complex-double-double-reg-imag-lo-tn y)))
+	   (inst movsd y-imag x-imag))))
+      (complex-double-double-stack
+       (let ((real-tn (complex-double-double-reg-real-hi-tn x)))
+	 (inst movsd (ea-for-cddf-real-hi-stack y fp) real-tn))
+       (let ((real-tn (complex-double-double-reg-real-lo-tn x)))
+	 (inst movsd (ea-for-cddf-real-lo-stack y fp) real-tn))
+       (let ((imag-tn (complex-double-double-reg-imag-hi-tn x)))
+	 (inst movsd (ea-for-cddf-imag-hi-stack y fp) imag-tn))
+       (let ((imag-tn (complex-double-double-reg-imag-lo-tn x)))
+	 (inst movsd (ea-for-cddf-imag-lo-stack y fp) imag-tn))))
+    ))
+
+#+double-double
+(define-move-vop move-complex-double-double-float-argument :move-argument
+  (complex-double-double-reg descriptor-reg) (complex-double-double-reg))
+
+(define-move-vop move-argument :move-argument
+  (single-reg double-reg #+long-float long-reg
+   #+double-double double-double-reg
+   complex-single-reg complex-double-reg #+long-float complex-long-reg
+   #+double-double complex-double-double-reg)
+  (descriptor-reg))
+
+
+;;;; Arithmetic VOPs:
+
+
+;;; dtc: The floating point arithmetic vops.
+;;; 
+;;; Note: Although these can accept x and y on the stack or pointed to
+;;; from a descriptor register, they will work with register loading
+;;; without these.  Same deal with the result - it need only be a
+;;; register.  When load-tns are needed they will probably be in ST0
+;;; and the code below should be able to correctly handle all cases.
+;;;
+;;; However it seems to produce better code if all arg. and result
+;;; options are used; on the P86 there is no extra cost in using a
+;;; memory operand to the FP instructions - not so on the PPro.
+;;;
+;;; It may also be useful to handle constant args?
+;;;
+;;; 22-Jul-97: descriptor args lose in some simple cases when
+;;; a function result computed in a loop. Then Python insists
+;;; on consing the intermediate values! For example
+#|
+(defun test(a n)
+  (declare (type (simple-array double-float (*)) a)
+	   (fixnum n))
+  (let ((sum 0d0))
+    (declare (type double-float sum))
+  (dotimes (i n)
+    (incf sum (* (aref a i)(aref a i))))
+    sum))
+|#
+;;; So, disabling descriptor args until this can be fixed elsewhere.
+;;;
+
+(define-vop (float-op)
+  (:args (x) (y))
+  (:results (r))
+  (:policy :fast-safe)
+  (:note _N"inline float arithmetic")
+  (:vop-var vop)
+  (:save-p :compute-only))
+
+(macrolet ((frob (name sc ptype)
+             `(define-vop (,name float-op)
+                (:args (x :scs (,sc) :target r)
+                       (y :scs (,sc)))
+                (:results (r :scs (,sc)))
+                (:arg-types ,ptype ,ptype)
+                (:result-types ,ptype))))
+  (frob single-float-op single-reg single-float)
+  (frob double-float-op double-reg double-float))
+
+(macrolet ((generate (movinst opinst commutative arg-type)
+	     (multiple-value-bind (rtype stack-sc ea ea-stack)
+		 (if (eq arg-type 'single)
+		     (values 'single-reg 'single-stack 'ea-for-sf-desc 'ea-for-sf-stack)
+		     (values 'double-reg 'double-stack 'ea-for-df-desc 'ea-for-df-stack))
+	       `(progn
+		  (cond
+		    ((location= x r)
+		     ;; x and r are the same.  We can just operate on x,
+		     ;; and we're done.
+		     (sc-case y
+		       (,rtype
+			(inst ,opinst x y))
+		       (descriptor-reg
+			(inst ,opinst x (,ea y)))
+		       (,stack-sc
+			(inst ,opinst x (,ea-stack y)))))
+		    ((and ,commutative (location= y r))
+		     ;; y = r and the operation is commutative, so just
+		     ;; do the operation with r and x.
+		     (inst ,opinst y x))
+		    ((not (location= r y))
+		     ;; x, y, and r are three different regs.  So just
+		     ;; move r to x and do the operation on r.
+		     (inst ,movinst r x)
+		     (sc-case y
+		       (,rtype
+			(inst ,opinst r y))
+		       (descriptor-reg
+			(inst ,opinst r (,ea y)))
+		       (,stack-sc
+			(inst, opinst r (,ea-stack y)))))
+		    (t
+		     ;; The hard case where the operation is not
+		     ;; commutative, but y might be r.  Don't want to
+		     ;; destroy y in this case, so use a temp so we
+		     ;; don't accidentally overwrite y.
+		     (inst ,movinst tmp x)
+		     (sc-case y
+		       (,rtype
+			(inst ,opinst tmp y))
+		       (descriptor-reg
+			(inst ,opinst tmp (,ea y)))
+		       (,stack-sc
+			(inst, opinst tmp (,ea-stack y))))
+		     (inst ,movinst r tmp))))))
+           (frob (op sinst sname scost dinst dname dcost commutative)
+             `(progn
+                (define-vop (,sname single-float-op)
+		  (:args (x :scs (single-reg) :target r)
+			 (y :scs (single-reg descriptor-reg)
+			    :load-if (not (sc-is y single-stack))))
+		  (:translate ,op)
+                  (:temporary (:sc single-reg) tmp)
+                  (:generator ,scost
+                    (generate movss ,sinst ,commutative single)))
+                (define-vop (,dname double-float-op)
+		  (:args (x :scs (double-reg) :target r)
+			 (y :scs (double-reg descriptor-reg)
+			    :load-if (not (sc-is y double-stack))))
+                  (:translate ,op)
+                  (:temporary (:sc double-reg) tmp)
+                  (:generator ,dcost
+                    (generate movsd ,dinst ,commutative double))))))
+  (frob + addss +/single-float 2 addsd +/double-float 2 t)
+  (frob - subss -/single-float 2 subsd -/double-float 2 nil)
+  (frob * mulss */single-float 4 mulsd */double-float 5 t)
+  (frob / divss //single-float 12 divsd //double-float 19 nil))
+
+(define-vop (fsqrt)
+  (:args (x :scs (double-reg)))
+  (:results (y :scs (double-reg)))
+  (:translate %sqrt)
+  (:policy :fast-safe)
+  (:arg-types double-float)
+  (:result-types double-float)
+  (:note _N"inline float arithmetic")
+  (:vop-var vop)
+  (:save-p :compute-only)
+  (:generator 1
+     (note-this-location vop :internal-error)
+     (inst sqrtsd y x)))
+
+(macrolet ((frob ((name translate mov sc type) &body body)
+             `(define-vop (,name)
+	        (:args (x :scs (,sc)))
+                (:results (y :scs (,sc)))
+                (:translate ,translate)
+                (:policy :fast-safe)
+                (:arg-types ,type)
+                (:result-types ,type)
+                (:temporary (:sc ,sc) tmp)
+                (:note _N"inline float arithmetic")
+                (:vop-var vop)
+                (:save-p :compute-only)
+                (:generator 1
+		  (note-this-location vop :internal-error)
+		  (inst pcmpeqd tmp tmp)		; all 1's
+		  ;; we should be able to do this better.  what we
+		  ;; really would like to do is use the target as the
+		  ;; temp whenever it's not also the source
+		  (unless (location= x y)
+		    (inst ,mov y x))
+		  , at body))))
+  (frob (%negate/double-float %negate movsd double-reg double-float)
+	(inst psllq tmp 63)		; tmp = #x8000000000000000
+	(inst xorpd y tmp))
+  (frob (%negate/single-float %negate movss single-reg single-float)
+	(inst pslld tmp 31)		; tmp = #x80000000
+	(inst xorps y tmp))
+  (frob (abs/double-float abs  movsd double-reg double-float)
+	(inst psrlq tmp 1)		; tmp = #x7fffffffffffffff
+	(inst andpd y tmp))
+  (frob (abs/single-float abs movss single-reg single-float)
+	(inst psrld tmp 1)		; tmp = #x7fffffff
+	(inst andps y tmp)))
+
+
+;;;; Comparison:
+
+#+long-float
+(deftransform eql ((x y) (long-float long-float))
+  `(and (= (long-float-low-bits x) (long-float-low-bits y))
+	(= (long-float-high-bits x) (long-float-high-bits y))
+	(= (long-float-exp-bits x) (long-float-exp-bits y))))
+
+#+double-double
+(deftransform eql ((x y) (double-double-float double-double-float))
+  '(and (eql (double-double-hi x) (double-double-hi y))
+	(eql (double-double-lo x) (double-double-lo y))))
+
+
+;;;; comparison
+
+(define-vop (float-compare)
+  (:conditional)
+  (:info target not-p)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:save-p :compute-only)
+  (:note _N"inline float comparison"))
+
+;;; comiss and comisd can cope with one or other arg in memory: we
+;;; could (should, indeed) extend these to cope with descriptor args
+;;; and stack args
+
+(define-vop (single-float-compare float-compare)
+  (:args (x :scs (single-reg)) (y :scs (single-reg descriptor-reg)))
+  (:conditional)
+  (:arg-types single-float single-float))
+(define-vop (double-float-compare float-compare)
+  (:args (x :scs (double-reg)) (y :scs (double-reg descriptor-reg)))
+  (:conditional)
+  (:arg-types double-float double-float))
+
+(define-vop (=/single-float single-float-compare)
+    (:translate =)
+  (:info target not-p)
+  (:vop-var vop)
+  (:generator 3
+    (note-this-location vop :internal-error)
+    (sc-case y
+      (single-reg
+       (inst ucomiss x y))
+      (descriptor-reg
+       (inst ucomiss x (ea-for-sf-desc y))))
+    ;; if PF&CF, there was a NaN involved => not equal
+    ;; otherwise, ZF => equal
+    (cond (not-p
+           (inst jmp :p target)
+           (inst jmp :ne target))
+          (t
+           (let ((not-lab (gen-label)))
+             (inst jmp :p not-lab)
+             (inst jmp :e target)
+             (emit-label not-lab))))))
+
+(define-vop (=/double-float double-float-compare)
+    (:translate =)
+  (:info target not-p)
+  (:vop-var vop)
+  (:generator 3
+    (note-this-location vop :internal-error)
+    (sc-case y
+      (double-reg
+       (inst ucomisd x y))
+      (descriptor-reg
+       (inst ucomisd x (ea-for-df-desc y))))
+    (cond (not-p
+           (inst jmp :p target)
+           (inst jmp :ne target))
+          (t
+           (let ((not-lab (gen-label)))
+             (inst jmp :p not-lab)
+             (inst jmp :e target)
+             (emit-label not-lab))))))
+
+(define-vop (</double-float double-float-compare)
+  (:translate <)
+  (:info target not-p)
+  (:generator 3
+    (sc-case y
+      (double-reg
+       (inst comisd x y))
+      (descriptor-reg
+       (inst comisd x (ea-for-df-desc y))))
+    (cond (not-p
+           (inst jmp :p target)
+           (inst jmp :nc target))
+          (t
+           (let ((not-lab (gen-label)))
+             (inst jmp :p not-lab)
+             (inst jmp :c target)
+             (emit-label not-lab))))))
+
+(define-vop (</single-float single-float-compare)
+  (:translate <)
+  (:info target not-p)
+  (:generator 3
+    (sc-case y
+      (single-reg
+       (inst comiss x y))
+      (descriptor-reg
+       (inst comiss x (ea-for-sf-desc y))))
+    (cond (not-p
+           (inst jmp :p target)
+           (inst jmp :nc target))
+          (t
+           (let ((not-lab (gen-label)))
+             (inst jmp :p not-lab)
+             (inst jmp :c target)
+             (emit-label not-lab))))))
+
+(define-vop (>/double-float double-float-compare)
+  (:translate >)
+  (:info target not-p)
+  (:generator 3
+    (sc-case y
+      (double-reg
+       (inst comisd x y))
+      (descriptor-reg
+       (inst comisd x (ea-for-df-desc y))))
+    (cond (not-p
+           (inst jmp :p target)
+           (inst jmp :na target))
+          (t
+           (let ((not-lab (gen-label)))
+             (inst jmp :p not-lab)
+             (inst jmp :a target)
+             (emit-label not-lab))))))
+
+(define-vop (>/single-float single-float-compare)
+  (:translate >)
+  (:info target not-p)
+  (:generator 3
+    (sc-case y
+      (single-reg
+       (inst comiss x y))
+      (descriptor-reg
+       (inst comiss x (ea-for-sf-desc y))))
+    (cond (not-p
+           (inst jmp :p target)
+           (inst jmp :na target))
+          (t
+           (let ((not-lab (gen-label)))
+             (inst jmp :p not-lab)
+             (inst jmp :a target)
+             (emit-label not-lab))))))
+
+
+
+;;;; Conversion:
+
+(macrolet ((frob (name translate inst to-sc to-type)
+             `(define-vop (,name)
+                (:args (x :scs (signed-stack signed-reg)))
+                (:results (y :scs (,to-sc)))
+                (:arg-types signed-num)
+                (:result-types ,to-type)
+                (:policy :fast-safe)
+                (:note _N"inline float coercion")
+                (:translate ,translate)
+                (:vop-var vop)
+                (:save-p :compute-only)
+                (:generator 5
+                  (sc-case x
+                    (signed-reg
+                     (note-this-location vop :internal-error)
+                     (inst ,inst y x))
+                    (signed-stack
+                     (note-this-location vop :internal-error)
+                     (inst ,inst y x)))))))
+  (frob %single-float/signed %single-float cvtsi2ss single-reg single-float)
+  (frob %double-float/signed %double-float cvtsi2sd double-reg double-float))
+
+(macrolet ((frob (name translate inst from-sc from-type to-sc to-type)
+	     (let ((ea (if (eq from-sc 'single-reg)
+			   'ea-for-sf-desc
+			   'ea-for-df-desc)))
+	       `(define-vop (,name)
+		  (:args (x :scs (,from-sc descriptor-reg) :target y))
+		  (:results (y :scs (,to-sc)))
+		  (:arg-types ,from-type)
+		  (:result-types ,to-type)
+		  (:policy :fast-safe)
+		  (:note _N"inline float coercion")
+		  (:translate ,translate)
+		  (:vop-var vop)
+		  (:save-p :compute-only)
+		  (:generator 2
+		    (note-this-location vop :internal-error)
+		    (sc-case x
+		      (,from-sc
+		       (inst ,inst y x))
+		      (descriptor-reg
+		       (inst ,inst y (,ea x)))))))))
+  (frob %single-float/double-float %single-float cvtsd2ss double-reg
+	double-float single-reg single-float)
+
+  (frob %double-float/single-float %double-float cvtss2sd
+	single-reg single-float double-reg double-float))
+
+(macrolet ((frob (trans inst from-sc from-type round-p)
+             (declare (ignore round-p))
+	     (let ((ea (if (eq from-sc 'single-reg)
+			   'ea-for-sf-desc
+			   'ea-for-df-desc)))
+	       `(define-vop (,(symbolicate trans "/" from-type))
+		  (:args (x :scs (,from-sc descriptor-reg)))
+		  (:temporary (:sc any-reg) temp-reg)
+		  (:results (y :scs (signed-reg)))
+		  (:arg-types ,from-type)
+		  (:result-types signed-num)
+		  (:translate ,trans)
+		  (:policy :fast-safe)
+		  (:note _N"inline float truncate")
+		  (:vop-var vop)
+		  (:save-p :compute-only)
+		  (:generator 5
+		    (sc-case y
+		      (signed-stack
+		       (sc-case x
+			 (,from-sc
+			  (inst ,inst temp-reg x))
+			 (descriptor-reg
+			  (inst ,inst temp-reg (,ea x))))
+		       (move y temp-reg))
+		      (signed-reg
+		       (sc-case x
+			 (,from-sc
+			  (inst ,inst y x))
+			 (descriptor-reg
+			  (inst ,inst y (,ea x)))))))))))
+  (frob %unary-truncate cvttss2si single-reg single-float nil)
+  (frob %unary-truncate cvttsd2si double-reg double-float nil)
+
+  (frob %unary-round cvtss2si single-reg single-float t)
+  (frob %unary-round cvtsd2si double-reg double-float t))
+
+(define-vop (fast-unary-ftruncate/single-float)
+  (:args (x :scs (single-reg descriptor-reg)))
+  (:arg-types single-float)
+  (:results (r :scs (single-reg)))
+  (:result-types single-float)
+  (:policy :fast-safe)
+  (:translate c::fast-unary-ftruncate)
+  (:temporary (:sc signed-reg) temp)
+  (:note _N"inline ftruncate")
+  (:generator 2
+    (sc-case x
+      (single-reg
+       (inst cvttss2si temp x))
+      (descriptor-reg
+       (inst cvttss2si temp (ea-for-sf-desc x))))
+    (inst cvtsi2ss r temp)))
+
+(define-vop (fast-unary-ftruncate/double-float)
+  (:args (x :scs (double-reg descriptor-reg) :target r))
+  (:arg-types double-float)
+  (:results (r :scs (double-reg)))
+  (:result-types double-float)
+  (:policy :fast-safe)
+  (:translate c::fast-unary-ftruncate)
+  (:temporary (:sc signed-reg) temp)
+  (:note _N"inline ftruncate")
+  (:generator 2
+    (sc-case x
+      (double-reg
+       (inst cvttsd2si temp x))
+      (descriptor-reg
+       (inst cvttsd2si temp (ea-for-df-desc x))))
+    (inst cvtsi2sd r temp)))
+
+(define-vop (make-single-float)
+  (:args (bits :scs (signed-reg) :target res
+               :load-if (not (or (and (sc-is bits signed-stack)
+                                      (sc-is res single-reg))
+                                 (and (sc-is bits signed-stack)
+                                      (sc-is res single-stack)
+                                      (location= bits res))))))
+  (:results (res :scs (single-reg single-stack)))
+  (:arg-types signed-num)
+  (:result-types single-float)
+  (:translate make-single-float)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 4
+    (sc-case res
+       (single-stack
+        (sc-case bits
+          (signed-reg
+           (inst mov res bits))
+          (signed-stack
+           (assert (location= bits res)))))
+       (single-reg
+        (sc-case bits
+          (signed-reg
+           (inst movd res bits))
+          (signed-stack
+           (inst movd res bits)))))))
+
+(define-vop (make-double-float)
+  (:args (hi-bits :scs (signed-reg))
+	 (lo-bits :scs (unsigned-reg)))
+  (:results (res :scs (double-reg)))
+  (:arg-types signed-num unsigned-num)
+  (:result-types double-float)
+  (:translate make-double-float)
+  (:temporary (:sc double-reg) temp)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 4
+    (inst movd temp hi-bits)
+    (inst psllq temp 32)
+    (inst movd res lo-bits)
+    (inst orpd res temp)))
+
+(define-vop (single-float-bits)
+  (:args (float :scs (single-reg descriptor-reg)
+                :load-if (not (sc-is float single-stack))))
+  (:results (bits :scs (signed-reg)))
+  (:arg-types single-float)
+  (:result-types signed-num)
+  (:translate single-float-bits)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 4
+    (sc-case bits
+      (signed-reg
+       (sc-case float
+         (single-reg
+	  (inst movd bits float))
+         (single-stack
+          (move bits float))
+         (descriptor-reg
+	  (loadw
+	   bits float vm:single-float-value-slot vm:other-pointer-type))))
+      (signed-stack
+       (sc-case float
+         (single-reg
+          (inst movss bits float)))))))
+
+(define-vop (double-float-high-bits)
+  (:args (float :scs (double-reg descriptor-reg)
+                :load-if (not (sc-is float double-stack))))
+  (:results (hi-bits :scs (signed-reg)))
+  (:temporary (:sc double-reg) temp)
+  (:arg-types double-float)
+  (:result-types signed-num)
+  (:translate double-float-high-bits)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 5
+     (sc-case float
+       (double-reg
+	(inst movq temp float)
+	(inst psrlq temp 32)
+	(inst movd hi-bits temp))
+       (double-stack
+        (loadw hi-bits ebp-tn (- (1+ (tn-offset float)))))
+       (descriptor-reg
+        (loadw hi-bits float (1+ double-float-value-slot)
+               vm:other-pointer-type)))))
+
+(define-vop (double-float-low-bits)
+  (:args (float :scs (double-reg descriptor-reg)
+                :load-if (not (sc-is float double-stack))))
+  (:results (lo-bits :scs (unsigned-reg)))
+  (:arg-types double-float)
+  (:result-types unsigned-num)
+  (:translate double-float-low-bits)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 5
+     (sc-case float
+       (double-reg
+	(inst movd lo-bits float))
+       (double-stack
+        (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float)))))
+       (descriptor-reg
+        (loadw lo-bits float vm:double-float-value-slot
+	       vm:other-pointer-type)))))
+
+(define-vop (double-float-bits)
+  (:args (float :scs (double-reg descriptor-reg)
+		:load-if (not (sc-is float double-stack))
+		:to (:result 1)))
+  (:results (hi-bits :scs (signed-reg))
+	    (lo-bits :scs (unsigned-reg)))
+  (:arg-types double-float)
+  (:result-types signed-num unsigned-num)
+  (:temporary (:sc double-reg) temp)
+  (:translate kernel::double-float-bits)
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 5
+    (sc-case float
+      (double-reg
+        (inst movq temp float)
+	(inst movd lo-bits temp)
+	(inst psrlq temp 32)
+	(inst movd hi-bits temp))
+      (double-stack
+       (loadw hi-bits ebp-tn (- (+ 1 (tn-offset float))))
+       (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float)))))
+      (descriptor-reg
+       (loadw hi-bits float (1+ double-float-value-slot)
+	   vm:other-pointer-type)
+       (loadw lo-bits float vm:double-float-value-slot
+	       vm:other-pointer-type)))))
+
+
+;;;; Float mode hackery:
+
+(deftype float-modes () '(unsigned-byte 24))
+
+;; For the record, here is the format of the MXCSR register.
+;;
+;; Bit
+;; 31-16      Reserved
+;; 15         Flush to zero
+;; 14-13      Rounding control
+;; 12         precision mask (inexact)
+;; 11         underflow mask
+;; 10         overflow mask
+;;  9         divide-by-zero mask
+;;  8         denormal operation mask
+;;  7         invalid operation mask
+;;  6         denormals-are-zeros
+;;  5         precision flag (inexact)
+;;  4         underflow flag
+;;  3         overflow flag
+;;  2         divide-by-zero flag
+;;  1         denormal operation flag
+;;  0         invalid operation flag
+;;
+;; See below for rounding control
+(defknown sse2-floating-point-modes () float-modes (flushable))
+(defknown ((setf sse2-floating-point-modes)) (float-modes) float-modes)
+
+;; Returns exactly the mxcsr register, except the masks are flipped
+;; because we want exception enable flags, not masks.
+(define-vop (sse2-floating-point-modes)
+  (:results (result :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:translate sse2-floating-point-modes)
+  (:policy :fast-safe)
+  (:temporary (:sc unsigned-stack) temp)
+  (:generator 3
+    (inst stmxcsr temp)
+    (inst mov result temp)
+    (inst xor result (ash #x3f 7))))
+
+;; Set mxcsr exactly to whatever is given, except we invert the
+;; exception enable flags to make them match the exception mask flags.
+(define-vop (set-sse2-floating-point-modes)
+  (:args (new :scs (unsigned-reg) :to :result :target res))
+  (:arg-types unsigned-num)
+  (:results (res :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:translate (setf sse2-floating-point-modes))
+  (:policy :fast-safe)
+  (:temporary (:sc unsigned-stack) cw-stack)
+  (:temporary (:sc unsigned-reg) temp)
+  (:generator 8
+    ;; The high 16 bits are reserved and will cause a segfault if set,
+    ;; so clear out those bits.
+    (inst mov temp new)
+    (inst and temp #xffff)
+    (inst xor temp (ash #x3f 7))	; Convert enables to masks
+    (inst mov cw-stack temp)
+    (inst ldmxcsr cw-stack)
+    (move res new)))
+
+;; For the record here is the format of the x87 control and status
+;; words:
+;;
+;; Status word:
+;;
+;; Bit
+;; 15         FPU Busy
+;; 14         Condition code C3
+;; 13-11      top of stack
+;; 10         Condition code C2
+;;  9         Condition code C1
+;;  8         Condition code C0
+;;  7         Error summary status
+;;  6         Stack fault
+;;  5         precision flag (inexact)
+;;  4         underflow flag
+;;  3         overflow flag
+;;  2         divide-by-zero flag
+;;  1         denormal operation flag
+;;  0         invalid operation flag
+;;
+;; Control word
+;;
+;; Bit
+;; 15-13      Reserved
+;; 12         Infinity control
+;; 11-10      Rounding control
+;;  9-8       Precision control
+;;  7-6       Reserved
+;;  5         precision mask (inexact)
+;;  4         underflow mask
+;;  3         overflow mask
+;;  2         divide-by-zero mask
+;;  1         denormal operation mask
+;;  0         invalid operation mask
+;;
+;; Round control:
+;;
+;; 00   nearest
+;; 01   negative infinity
+;; 10   positive infinity
+;; 11   zero (truncate)
+;;
+;; Precision control
+;;
+;; 00   single precision (24 bits)
+;; 01   reserved
+;; 10   double precision (53 bits)
+;; 11   double extended precision (64 bits)
+
+(defknown x87-floating-point-modes () float-modes (flushable))
+(defknown ((setf x87-floating-point-modes)) (float-modes)
+  float-modes)
+
+;; Extract the control and status words from the FPU.  The low 16 bits
+;; contain the control word, and the high 16 bits contain the status.
+(define-vop (x87-floating-point-modes)
+  (:results (res :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:translate x87-floating-point-modes)
+  (:policy :fast-safe)
+  (:temporary (:sc unsigned-stack) cw-stack)
+  (:temporary (:sc unsigned-reg :offset eax-offset) sw-reg)
+  (:generator 8
+   (inst fnstsw)
+   (inst fnstcw cw-stack)
+   (inst and sw-reg #xff)		; mask exception flags
+   (inst shl sw-reg 16)
+   (inst byte #x66)			; operand size prefix
+   (inst or sw-reg cw-stack)
+   (inst xor sw-reg #x3f)		; invert exception mask
+   (move res sw-reg)))
+
+;; Set the control and status words from the FPU.  The low 16 bits
+;; contain the control word, and the high 16 bits contain the status.
+(define-vop (x87-set-floating-point-modes)
+  (:args (new :scs (unsigned-reg) :to :result :target res))
+  (:results (res :scs (unsigned-reg)))
+  (:arg-types unsigned-num)
+  (:result-types unsigned-num)
+  (:translate (setf x87-floating-point-modes))
+  (:policy :fast-safe)
+  (:temporary (:sc unsigned-stack) cw-stack)
+  (:temporary (:sc byte-reg :offset al-offset) sw-reg)
+  (:temporary (:sc unsigned-reg :offset ecx-offset) old)
+  (:generator 6
+   (inst mov cw-stack new)
+   (inst xor cw-stack #x3f)  ; invert exception mask
+   (inst fnstsw)
+   (inst fldcw cw-stack)  ; always update the control word
+   (inst mov old new)
+   (inst shr old 16)
+   (inst cmp cl-tn sw-reg)  ; compare exception flags
+   (inst jmp :z DONE)  ; skip updating the status word
+   (inst sub esp-tn 28)
+   (inst fstenv (make-ea :dword :base esp-tn))
+   (inst mov (make-ea :byte :base esp-tn :disp 4) cl-tn)
+   (inst fldenv (make-ea :dword :base esp-tn))
+   (inst add esp-tn 28)
+   DONE
+   (move res new)))
+
+
+(defun sse2-floating-point-modes ()
+  (sse2-floating-point-modes))
+(defun (setf sse2-floating-point-modes) (new)
+  (setf (sse2-floating-point-modes) new))
+
+(defun x87-floating-point-modes ()
+  (x87-floating-point-modes))
+(defun (setf x87-floating-point-modes) (new)
+  (setf (x87-floating-point-modes) new))
+
+
+;;;; Complex float VOPs
+(define-vop (make-complex-single-float)
+  (:translate complex)
+  (:args (real :scs (single-reg) :to :save)
+	 (imag :scs (single-reg) :to :save))
+  (:arg-types single-float single-float)
+  (:results (r :scs (complex-single-reg) :from (:argument 0)
+	       :load-if (not (sc-is r complex-single-stack))))
+  (:result-types complex-single-float)
+  (:temporary (:sc complex-single-reg) temp)
+  (:note _N"inline complex single-float creation")
+  (:policy :fast-safe)
+  (:generator 5
+    (sc-case r
+      (complex-single-reg
+       ;; x = a + b*i = b|a
+       (inst xorps temp temp)		; temp = 0|0|0|0
+       (inst movss temp real)		; temp = 0|0|0|a
+       (inst unpcklps temp imag)	; temp = 0|0|b|a
+       (inst movaps r temp))
+      (complex-single-stack
+       (inst movss (ea-for-csf-real-stack r) real)
+       (inst movss (ea-for-csf-imag-stack r) imag)))))
+
+(define-vop (make-complex-double-float)
+  (:translate complex)
+  (:args (real :scs (double-reg) :to :save)
+	 (imag :scs (double-reg) :to :save))
+  (:arg-types double-float double-float)
+  (:results (r :scs (complex-double-reg) :from (:argument 0)
+	       :load-if (not (sc-is r complex-double-stack))))
+  (:result-types complex-double-float)
+  (:temporary (:sc complex-double-reg) temp)
+  (:note _N"inline complex double-float creation")
+  (:policy :fast-safe)
+  (:generator 5
+    (sc-case r
+      (complex-double-reg
+       ;; x = a + b*i = b|a
+       (inst movsd temp real)		; temp = ?|a
+       (inst unpcklpd temp imag)	; temp = b|a
+       (inst movapd r temp))
+      (complex-double-stack
+       (inst movsd (ea-for-cdf-real-stack r) real)
+       (inst movsd (ea-for-cdf-imag-stack r) imag)))))
+
+(define-vop (realpart/complex-single-float)
+  (:translate realpart)
+  (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg)))
+  (:arg-types complex-single-float)
+  (:results (r :scs (single-reg)))
+  (:result-types single-float)
+  (:temporary (:sc single-reg) temp)
+  (:policy :fast-safe)
+  (:note _N"complex float realpart")
+  (:generator 3
+    (sc-case x
+      (complex-single-reg
+       (cond ((location= r x)
+	      (inst xorps temp temp)	; temp = 0|0|0|0
+	      (inst movss temp x)	; temp = 0|0|0|x
+	      (inst movss r temp))	; r = temp
+	     (t
+	      (inst xorps r r)		; temp = 0|0|0|0
+	      (inst movss r x))))	; r = 0|0|0|x
+      (complex-single-stack
+       (inst movss r (ea-for-csf-real-stack x)))
+      (descriptor-reg
+       (inst movss r (ea-for-csf-real-desc x))))))
+
+(define-vop (realpart/complex-double-float)
+  (:translate realpart)
+  (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg)))
+  (:arg-types complex-double-float)
+  (:results (r :scs (double-reg)))
+  (:result-types double-float)
+  (:temporary (:sc double-reg) temp)
+  (:policy :fast-safe)
+  (:note "complex float realpart")
+  (:generator 3
+    (sc-case x
+      (complex-double-reg
+       (cond ((location= r x)
+	      (inst xorpd temp temp)	; temp = 0|0
+	      (inst movsd temp x)	; temp = 0|x
+	      (inst movsd r temp))	; r = temp
+	     (t
+	      (inst xorpd r r)		; r = 0|0
+	      (inst movsd r x))))	; r = 0|x
+      (complex-double-stack
+       (inst movsd r (ea-for-cdf-real-stack x)))
+      (descriptor-reg
+       (inst movsd r (ea-for-cdf-real-desc x))))))
+
+(define-vop (imagpart/complex-single-float)
+  (:translate imagpart)
+  (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg)))
+  (:arg-types complex-single-float)
+  (:results (r :scs (single-reg)))
+  (:result-types single-float)
+  (:temporary (:sc complex-single-reg) temp)
+  (:policy :fast-safe)
+  (:note _N"complex float imagpart")
+  (:generator 3
+    (sc-case x
+      (complex-single-reg
+       ;; x = a+b*i = b|a
+       ;; Get the imag part to the low part of temp.  We don't care about
+       ;; the other parts of r.
+       (inst movaps temp x)		; temp = u|u|b|a
+       (inst shufps temp x #b01)	; temp = a|a|a|b
+       (inst xorps r r)			; r = 0|0|0|0
+       (inst movss r temp)		; r = 0|0|0|b
+       )
+      (complex-single-stack
+       (inst movss r (ea-for-csf-imag-stack x)))
+      (descriptor-reg
+       (inst movss r (ea-for-csf-imag-desc x))))))
+
+(define-vop (imagpart/complex-double-float)
+  (:translate imagpart)
+  (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg)))
+  (:arg-types complex-double-float)
+  (:results (r :scs (double-reg)))
+  (:result-types double-float)
+  (:temporary (:sc complex-double-reg) temp)
+  (:policy :fast-safe)
+  (:note _N"complex float imagpart")
+  (:generator 3
+    (sc-case x
+      (complex-double-reg
+       (cond ((location= r x)
+	      (inst xorpd temp temp)	; temp = 0|0
+	      (inst movhlps temp x)	; temp = 0|b
+	      (inst movsd r temp))	; r = temp
+	     (t
+	      (inst xorpd r r)		; r = 0|0
+	      (inst movhlps r x))))	; r = 0|b
+      (complex-double-stack
+       (inst movsd r (ea-for-cdf-imag-stack x)))
+      (descriptor-reg
+       (inst movsd r (ea-for-cdf-imag-desc x))))))
+
+;;; A hack dummy VOP to bias the representation selection of its
+;;; argument towards a FP register which can help avoid consing at
+;;; inappropriate locations.
+
+(defknown double-float-reg-bias (double-float) (values))
+;;;
+(define-vop (double-float-reg-bias)
+  (:translate double-float-reg-bias)
+  (:args (x :scs (double-reg double-stack) :load-if nil))
+  (:arg-types double-float)
+  (:policy :fast-safe)
+  (:note _N"inline dummy FP register bias")
+  (:ignore x)
+  (:generator 0))
+
+(defknown single-float-reg-bias (single-float) (values))
+;;;
+(define-vop (single-float-reg-bias)
+  (:translate single-float-reg-bias)
+  (:args (x :scs (single-reg single-stack) :load-if nil))
+  (:arg-types single-float)
+  (:policy :fast-safe)
+  (:note _N"inline dummy FP register bias")
+  (:ignore x)
+  (:generator 0))
+
+;;; Support for double-double floats
+
+#+double-double
+(progn
+
+(defun double-double-reg-hi-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (tn-offset x)))
+
+(defun double-double-reg-lo-tn (x)
+  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg *backend*)
+		  :offset (1+ (tn-offset x))))
+
+(define-move-function (load-double-double 4) (vop x y)
+  ((double-double-stack) (double-double-reg))
+  (let ((real-tn (double-double-reg-hi-tn y)))
+    (inst movsd real-tn (ea-for-cdf-real-stack x)))
+  (let ((imag-tn (double-double-reg-lo-tn y)))
+    (inst movsd imag-tn (ea-for-cdf-imag-stack x))))
+
+(define-move-function (store-double-double 4) (vop x y)
+  ((double-double-reg) (double-double-stack))
+  (let ((real-tn (double-double-reg-hi-tn x)))
+    (inst movsd (ea-for-cdf-real-stack y) real-tn))
+  (let ((imag-tn (double-double-reg-lo-tn x)))
+    (inst movsd (ea-for-cdf-imag-stack y) imag-tn)))
+
+;;; Double-double float register to register moves
+
+(define-vop (double-double-move)
+  (:args (x :scs (double-double-reg)
+	    :target y :load-if (not (location= x y))))
+  (:results (y :scs (double-double-reg) :load-if (not (location= x y))))
+  (:note _N"double-double float move")
+  (:generator 0
+     (unless (location= x y)
+       ;; Note the double-float-regs are aligned to every second
+       ;; float register so there is not need to worry about overlap.
+       (let ((x-hi (double-double-reg-hi-tn x))
+	     (y-hi (double-double-reg-hi-tn y)))
+	 (inst movsd y-hi x-hi)
+       (let ((x-lo (double-double-reg-lo-tn x))
+	     (y-lo (double-double-reg-lo-tn y)))
+	 (inst movsd y-lo x-lo))))))
+;;;
+(define-move-vop double-double-move :move
+  (double-double-reg) (double-double-reg))
+
+;;; Move from a complex float to a descriptor register allocating a
+;;; new complex float object in the process.
+
+(define-vop (move-from-double-double)
+  (:args (x :scs (double-double-reg) :to :save))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note _N"double double float to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y vm:double-double-float-type
+			       vm:double-double-float-size node)
+       (let ((real-tn (double-double-reg-hi-tn x)))
+	 (inst movsd (ea-for-cdf-real-desc y) real-tn))
+       (let ((imag-tn (double-double-reg-lo-tn x)))
+	 (inst movsd (ea-for-cdf-imag-desc y) imag-tn)))))
+;;;
+(define-move-vop move-from-double-double :move
+  (double-double-reg) (descriptor-reg))
+
+;;; Move from a descriptor to a double-double float register
+
+(define-vop (move-to-double-double)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (double-double-reg)))
+  (:note _N"pointer to double-double-float coercion")
+  (:generator 2
+    (let ((real-tn (double-double-reg-hi-tn y)))
+      (inst movsd real-tn (ea-for-cdf-real-desc x)))
+    (let ((imag-tn (double-double-reg-lo-tn y)))
+      (inst movsd imag-tn (ea-for-cdf-imag-desc x)))))
+
+(define-move-vop move-to-double-double :move
+  (descriptor-reg) (double-double-reg))
+
+;;; double-double float move-argument vop
+
+(define-vop (move-double-double-float-argument)
+  (:args (x :scs (double-double-reg) :target y)
+	 (fp :scs (any-reg) :load-if (not (sc-is y double-double-reg))))
+  (:results (y))
+  (:note _N"double double-float argument move")
+  (:generator 2
+    (sc-case y
+      (double-double-reg
+       (unless (location= x y)
+	 (let ((x-real (double-double-reg-hi-tn x))
+	       (y-real (double-double-reg-hi-tn y)))
+	   (inst movsd y-real x-real))
+	 (let ((x-imag (double-double-reg-lo-tn x))
+	       (y-imag (double-double-reg-lo-tn y)))
+	   (inst movsd y-imag x-imag))))
+      (double-double-stack
+       (let ((hi-tn (double-double-reg-hi-tn x)))
+	 (inst movsd (ea-for-cdf-real-stack y fp) hi-tn))
+       (let ((lo-tn (double-double-reg-lo-tn x)))
+	 (inst movsd (ea-for-cdf-imag-stack y fp) lo-tn))))))
+
+(define-move-vop move-double-double-float-argument :move-argument
+  (double-double-reg descriptor-reg) (double-double-reg))
+
+
+(define-vop (move-to-complex-double-double)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (complex-double-double-reg)))
+  (:note _N"pointer to complex float coercion")
+  (:generator 2
+    (let ((real-tn (complex-double-double-reg-real-hi-tn y)))
+      (inst movsd real-tn (ea-for-cddf-real-hi-desc x)))
+    (let ((real-tn (complex-double-double-reg-real-lo-tn y)))
+      (inst movsd real-tn (ea-for-cddf-real-lo-desc x)))
+    (let ((imag-tn (complex-double-double-reg-imag-hi-tn y)))
+      (inst movsd imag-tn (ea-for-cddf-imag-hi-desc x)))
+    (let ((imag-tn (complex-double-double-reg-imag-lo-tn y)))
+      (inst movsd imag-tn (ea-for-cddf-imag-lo-desc x)))))
+
+(define-move-vop move-to-complex-double-double :move
+  (descriptor-reg) (complex-double-double-reg))
+
+
+(define-vop (make/double-double-float)
+  (:args (hi :scs (double-reg) :target r
+	     :load-if (not (location= hi r)))
+	 (lo :scs (double-reg) :to :save))
+  (:results (r :scs (double-double-reg) :from (:argument 0)
+	       :load-if (not (sc-is r double-double-stack))))
+  (:arg-types double-float double-float)
+  (:result-types double-double-float)
+  (:translate kernel::%make-double-double-float)
+  (:note _N"inline double-double-float creation")
+  (:policy :fast-safe)
+  (:vop-var vop)
+  (:generator 5
+    (sc-case r
+      (double-double-reg
+       (let ((r-real (double-double-reg-hi-tn r)))
+	 (unless (location= hi r-real)
+	   (inst movsd r-real hi)))
+       (let ((r-imag (double-double-reg-lo-tn r)))
+	 (unless (location= lo r-imag)
+	   (inst movsd r-imag lo))))
+      (double-double-stack
+       (unless (location= hi r)
+	 (inst movsd (ea-for-cdf-real-stack r) hi))
+       (inst movsd (ea-for-cdf-imag-stack r) lo)))))
+
+(define-vop (double-double-value)
+  (:args (x :target r))
+  (:results (r))
+  (:variant-vars offset)
+  (:policy :fast-safe)
+  (:generator 3
+    (cond ((sc-is x double-double-reg)
+	   (let ((value-tn
+		  (make-random-tn :kind :normal
+				  :sc (sc-or-lose 'double-reg *backend*)
+				  :offset (+ offset (tn-offset x)))))
+	     (unless (location= value-tn r)
+	       (inst movsd r value-tn))))
+	  ((sc-is r double-reg)
+	   (let ((ea (sc-case x
+		       (double-double-stack
+			(ecase offset
+			  (0 (ea-for-cdf-real-stack x))
+			  (1 (ea-for-cdf-imag-stack x))))
+		       (descriptor-reg
+			(ecase offset
+			  (0 (ea-for-cdf-real-desc x))
+			  (1 (ea-for-cdf-imag-desc x)))))))
+	     (inst movsd r ea)))
+	  (t (error "double-double-value VOP failure")))))
+
+
+(define-vop (hi/double-double-value double-double-value)
+  (:translate kernel::double-double-hi)
+  (:args (x :scs (double-double-reg double-double-stack descriptor-reg)
+	    :target r))
+  (:arg-types double-double-float)
+  (:results (r :scs (double-reg)))
+  (:result-types double-float)
+  (:note _N"double-double high part")
+  (:variant 0))
+
+(define-vop (lo/double-double-value double-double-value)
+  (:translate kernel::double-double-lo)
+  (:args (x :scs (double-double-reg double-double-stack descriptor-reg)
+	    :target r))
+  (:arg-types double-double-float)
+  (:results (r :scs (double-reg)))
+  (:result-types double-float)
+  (:note _N"double-double low part")
+  (:variant 1))
+
+(define-vop (make-complex-double-double-float)
+  (:translate complex)
+  (:args (real :scs (double-double-reg) :target r
+	       :load-if (not (location= real r))
+	       )
+	 (imag :scs (double-double-reg) :to :save))
+  (:arg-types double-double-float double-double-float)
+  (:results (r :scs (complex-double-double-reg) :from (:argument 0)
+	       :load-if (not (sc-is r complex-double-double-stack))))
+  (:result-types complex-double-double-float)
+  (:note _N"inline complex double-double-float creation")
+  (:policy :fast-safe)
+  (:generator 5
+    (sc-case r
+      (complex-double-double-reg
+       (let ((r-real (complex-double-double-reg-real-hi-tn r))
+	     (a-real (double-double-reg-hi-tn real)))
+	 (unless (location= a-real r-real)
+	   (inst movsd r-real a-real)))
+       (let ((r-real (complex-double-double-reg-real-lo-tn r))
+	     (a-real (double-double-reg-lo-tn real)))
+	 (unless (location= a-real r-real)
+	   (inst movsd r-real a-real)))
+       (let ((r-imag (complex-double-double-reg-imag-hi-tn r))
+	     (a-imag (double-double-reg-hi-tn imag)))
+	 (unless (location= a-imag r-imag)
+	   (inst movsd r-imag a-imag)))
+       (let ((r-imag (complex-double-double-reg-imag-lo-tn r))
+	     (a-imag (double-double-reg-lo-tn imag)))
+	 (unless (location= a-imag r-imag)
+	   (inst movsd r-imag a-imag))))
+      (complex-double-double-stack
+       (unless (location= real r)
+	 (inst movsd (ea-for-cddf-real-hi-stack r) real))
+       (let ((real-lo (double-double-reg-lo-tn real)))
+	 (inst movsd (ea-for-cddf-real-lo-stack r) real-lo))
+       (let ((imag-val (double-double-reg-hi-tn imag)))
+	 (inst movsd (ea-for-cddf-imag-hi-stack r) imag-val))
+       (let ((imag-val (double-double-reg-lo-tn imag)))
+	 (inst movsd (ea-for-cddf-imag-lo-stack r) imag-val))))))
+
+(define-vop (complex-double-double-float-value)
+  (:args (x :scs (complex-double-double-reg descriptor-reg) :target r
+	    :load-if (not (sc-is x complex-double-double-stack))))
+  (:arg-types complex-double-double-float)
+  (:results (r :scs (double-double-reg)))
+  (:result-types double-double-float)
+  (:variant-vars slot)
+  (:policy :fast-safe)
+  (:generator 3
+    (sc-case x
+      (complex-double-double-reg
+       (let ((value-tn (ecase slot
+			 (:real (complex-double-double-reg-real-hi-tn x))
+			 (:imag (complex-double-double-reg-imag-hi-tn x))))
+	     (r-hi (double-double-reg-hi-tn r)))
+	 (unless (location= value-tn r-hi)
+	   (inst movsd r-hi value-tn)))
+       (let ((value-tn (ecase slot
+			 (:real (complex-double-double-reg-real-lo-tn x))
+			 (:imag (complex-double-double-reg-imag-lo-tn x))))
+	     (r-lo (double-double-reg-lo-tn r)))
+	 (unless (location= value-tn r-lo)
+	   (inst movsd r-lo value-tn))))
+      (complex-double-double-stack
+       (let ((r-hi (double-double-reg-hi-tn r)))
+	 (inst movsd r-hi (ecase slot
+			    (:real (ea-for-cddf-real-hi-stack x))
+			    (:imag (ea-for-cddf-imag-hi-stack x)))))
+       (let ((r-lo (double-double-reg-lo-tn r)))
+	 (inst movsd r-lo (ecase slot
+			    (:real (ea-for-cddf-real-lo-stack x))
+			    (:imag (ea-for-cddf-imag-lo-stack x))))))
+      (descriptor-reg
+       (let ((r-hi (double-double-reg-hi-tn r)))
+	 (inst movsd r-hi (ecase slot
+			    (:real (ea-for-cddf-real-hi-desc x))
+			    (:imag (ea-for-cddf-imag-hi-desc x)))))
+       (let ((r-lo (double-double-reg-lo-tn r)))
+	 (inst movsd r-lo (ecase slot
+			    (:real (ea-for-cddf-real-lo-desc x))
+			    (:imag (ea-for-cddf-imag-lo-desc x)))))))))
+
+(define-vop (realpart/complex-double-double-float complex-double-double-float-value)
+  (:translate realpart)
+  (:note _N"complex float realpart")
+  (:variant :real))
+
+(define-vop (imagpart/complex-double-double-float complex-double-double-float-value)
+  (:translate imagpart)
+  (:note _N"complex float imagpart")
+  (:variant :imag))
+
+); progn
+
+
+;;; Vops for complex arithmetic.  These are usually much faster than
+;;; the compiler-generated code using deftransforms.
+
+;; Negate a complex
+(macrolet
+    ((negate-complex (type shift xor amount)
+       (let ((name (symbolicate "%NEGATE/COMPLEX-" type "-FLOAT"))
+	     (sc-type (symbolicate "COMPLEX-" type "-FLOAT"))
+	     (sc (symbolicate "COMPLEX-" type "-REG")))
+	 `(define-vop (,name)
+	    (:translate %negate)
+	    (:args (x :scs (,sc) :target r))
+	    (:arg-types ,sc-type)
+	    (:results (r :scs (,sc)))
+	    (:result-types ,sc-type)
+	    (:policy :fast-safe)
+	    (:temporary (:scs (,sc)) t0)
+	    (:generator 1
+	      (inst pcmpeqd t0 t0)	; all ones
+	      (inst ,shift t0 ,amount)	; #x8000...0000
+	      (unless (location= x r)
+		(inst movaps r x))
+	      (inst ,xor r t0))))))
+  (negate-complex single pslld xorps 31)
+  (negate-complex double psllq xorpd 63))
+
+;; Convert various number types to complex double-floats
+(macrolet
+    ((convert-complex (trans op to from)
+       (let ((name (symbolicate to "/" from))
+	     (from-sc (symbolicate from "-REG"))
+	     (from-type (symbolicate from "-FLOAT"))
+	     (to-sc (symbolicate to "-REG"))
+	     (to-type (symbolicate to "-FLOAT")))
+	 `(define-vop (,name)
+	   (:translate ,trans)
+	   (:args (x :scs (,from-sc) :target r))
+	   (:arg-types ,from-type)
+	   (:results (r :scs (,to-sc)))
+	   (:result-types ,to-type)
+	   (:policy :fast-safe)
+	   (:generator 1
+	     ;; NOTE: We don't have 128-bit aligned objects, so we
+	     ;; can't use the stack or descriptors here.
+	     (inst ,op r x))))))
+  (convert-complex %complex-double-float cvtps2pd complex-double complex-single)
+  (convert-complex %complex-single-float cvtpd2ps complex-single complex-double))
+
+(macrolet
+    ((convert-complex (trans op base-ea to from movinst)
+       (let ((name (symbolicate to "/" from))
+	     (from-sc (symbolicate from "-REG"))
+	     (from-sc-stack (symbolicate from "-STACK"))
+	     (from-type (symbolicate from "-FLOAT"))
+	     (to-sc (symbolicate to "-REG"))
+	     (to-type (symbolicate to "-FLOAT")))
+	 `(define-vop (,name)
+	   (:translate ,trans)
+	   (:args (x :scs (,from-sc ,from-sc-stack descriptor-reg)
+		   :target r))
+	   (:arg-types ,from-type)
+	   (:results (r :scs (,to-sc)))
+	   (:result-types ,to-type)
+	   (:temporary (:sc ,to-sc) temp)
+	   (:policy :fast-safe)
+	   (:generator 1
+	     (sc-case x
+	       (,from-sc
+		;; Need to make sure the imaginary part is zero
+		(cond ((location= x r)
+		       (inst xorps temp temp)
+		       (inst ,op temp x)
+		       (inst ,movinst r temp))
+		      (t
+		       (inst xorps r r)
+		       (inst ,op r x))))
+	       (,from-sc-stack
+		(inst xorps r r)
+		(inst ,op r (,(symbolicate "EA-FOR-" base-ea "-STACK") x)))
+	       (descriptor-reg
+		(inst xorps r r)
+		(inst ,op r (,(symbolicate "EA-FOR-" base-ea "-DESC") x)))))))))
+  (convert-complex %complex-double-float cvtss2sd sf complex-double single movapd)
+  (convert-complex %complex-single-float cvtsd2ss df complex-single double movaps))
+
+;; Add and subtract for two complex arguments
+(macrolet
+    ((generate (movinst opinst commutative)
+       `(cond
+	 ((location= x r)
+	  (inst ,opinst x y))
+	 ((and ,commutative (location= y r))
+	  (inst ,opinst y x))
+	 ((not (location= r y))
+	  (inst ,movinst r x)
+	  (inst ,opinst r y))
+	 (t
+	  (inst ,movinst tmp x)
+	  (inst ,opinst tmp y)
+	  (inst ,movinst r tmp))))
+     (complex-add/sub (op inst float-type cost &optional commutative)
+       (let* ((vop-name (symbolicate (symbol-name op) "/COMPLEX-" float-type "-FLOAT"))
+	      (c-type (symbolicate "COMPLEX-" float-type "-FLOAT"))
+	      (complex-reg (symbolicate "COMPLEX-" float-type "-REG")))
+	 ;; Note: It would probably improve things if we could use
+	 ;; memory operands, but we can't because the instructions
+	 ;; assumed 128-bit alignment, which we can't guarantee.
+	 `(define-vop (,vop-name)
+	   (:args (x :scs (,complex-reg) :target r)
+	          (y :scs (,complex-reg)))
+	   (:results (r :scs (,complex-reg)))
+	   (:arg-types ,c-type ,c-type)
+	   (:result-types ,c-type)
+	   (:policy :fast-safe)
+	   (:note _N"inline complex float arithmetic")
+	   (:translate ,op)
+	   (:temporary (:sc ,complex-reg) tmp)
+	   (:generator ,cost
+	     (generate movaps ,inst ,commutative))))))
+  (complex-add/sub + addps single 1 t)
+  (complex-add/sub + addpd double 1 t)
+  (complex-add/sub - subps single 1)
+  (complex-add/sub - subpd double 1))
+
+;; Add and subtract a complex and a float
+(macrolet
+    ((generate (movinst opinst)
+       `(cond
+	 ((location= x r)
+	  (inst ,opinst x rtmp))
+	 ((not (location= r rtmp))
+	  (inst ,movinst r x)
+	  (inst ,opinst r rtmp))
+	 (t
+	  (inst ,movinst tmp x)
+	  (inst ,opinst tmp rtmp)
+	  (inst ,movinst r tmp))))
+     (complex-op-float (size op fop base-ea cost)
+       (let ((vop-name (symbolicate "COMPLEX-" size "-FLOAT-"
+				    op
+				    "-" size "-FLOAT"))
+	     (complex-reg (symbolicate "COMPLEX-" size "-REG"))
+	     (real-reg (symbolicate size "-REG"))
+	     (c-type (symbolicate "COMPLEX-" size "-FLOAT"))
+	     (r-type (symbolicate size "-FLOAT"))
+	     (r-stack (symbolicate size "-STACK"))
+	     (ea-stack (symbolicate "EA-FOR-" base-ea "-STACK"))
+	     (ea-desc (symbolicate "EA-FOR-" base-ea "-DESC"))
+	     (loadinst (ecase size
+			 (single 'movss)
+			 (double 'movsd)))
+	     (movinst (ecase size
+			(single 'movaps)
+			(double 'movapd))))
+	 `(define-vop (,vop-name)
+	    (:args (x :scs (,complex-reg))
+	           (y :scs (,real-reg ,r-stack descriptor-reg)))
+	    (:results (r :scs (,complex-reg)))
+	    (:arg-types ,c-type ,r-type)
+	    (:result-types ,c-type)
+	    (:policy :fast-safe)
+	    (:note _N"inline complex float/float arithmetic")
+	    (:translate ,op)
+	    (:temporary (:sc ,complex-reg) tmp)
+	    (:temporary (:sc ,real-reg) rtmp)
+	    (:generator ,cost
+	      ;; Clear out high and low parts of temp, which will
+	      ;; eventually hold y.
+	      (inst xorpd rtmp rtmp)
+	      (sc-case y
+		(,real-reg
+		 (inst ,loadinst rtmp y)
+		 (generate ,movinst ,fop))
+		(,r-stack
+		 (let ((ea (,ea-stack y)))
+		   (inst ,loadinst rtmp ea)
+		   (generate ,movinst ,fop)))
+		(descriptor-reg
+		 (let ((ea (,ea-desc y)))
+		   (inst ,loadinst rtmp ea)
+		   (generate ,movinst ,fop)))))))))
+  (complex-op-float single + addps sf 1)
+  (complex-op-float single - subps sf 1)
+  (complex-op-float double + addpd df 1)
+  (complex-op-float double - subpd df 1))
+
+;; Add a float and a complex
+(macrolet
+    ((generate (movinst opinst)
+       `(cond
+	 ((location= x r)
+	  (inst ,opinst x rtmp))
+	 ((not (location= r y))
+	  (inst ,movinst r x)
+	  (inst ,opinst r rtmp))
+	 (t
+	  (inst ,movinst tmp x)
+	  (inst ,opinst tmp rtmp)
+	  (inst ,movinst r tmp))))
+     (complex-op-float (size op fop base-ea cost)
+       (let ((vop-name (symbolicate size "-FLOAT-"
+				    op
+				    "-" "COMPLEX-" size "-FLOAT"))
+	     (complex-reg (symbolicate "COMPLEX-" size "-REG"))
+	     (real-reg (symbolicate size "-REG"))
+	     (c-type (symbolicate "COMPLEX-" size "-FLOAT"))
+	     (r-type (symbolicate size "-FLOAT"))
+	     (r-stack (symbolicate size "-STACK"))
+	     (ea-stack (symbolicate "EA-FOR-" base-ea "-STACK"))
+	     (ea-desc (symbolicate "EA-FOR-" base-ea "-DESC"))
+	     (loadinst (ecase size
+			 (single 'movss)
+			 (double 'movsd)))
+	     (movinst (ecase size
+			(single 'movaps)
+			(double 'movapd))))
+	 `(define-vop (,vop-name)
+	    (:args (y :scs (,real-reg ,r-stack descriptor-reg))
+	           (x :scs (,complex-reg)))
+	    (:results (r :scs (,complex-reg)))
+	    (:arg-types ,r-type ,c-type)
+	    (:result-types ,c-type)
+	    (:policy :fast-safe)
+	    (:note _N"inline complex float/float arithmetic")
+	    (:translate ,op)
+	    (:temporary (:sc ,complex-reg) tmp)
+	    (:temporary (:sc ,real-reg) rtmp)
+	    (:generator ,cost
+	      (inst xorpd rtmp rtmp)
+	      (sc-case y
+		(,real-reg
+		 (inst ,loadinst rtmp y)
+		 (generate ,movinst ,fop))
+		(,r-stack
+		 (let ((ea (,ea-stack y)))
+		   (inst ,loadinst rtmp ea)
+		   (generate ,movinst ,fop)))
+		(descriptor-reg
+		 (let ((ea (,ea-desc y)))
+		   (inst ,loadinst rtmp ea)
+		   (generate ,movinst ,fop)))))))))
+  (complex-op-float single + addps sf 1)
+  (complex-op-float double + addpd df 1))
+
+;; Multiply a complex by a float or a float by a complex.
+(macrolet
+    ((complex-*-float (float-type fmul copy cost)
+       (let* ((vop-name (symbolicate "COMPLEX-"
+				     float-type
+				     "-FLOAT-*-"
+				     float-type
+				     "-FLOAT"))
+	      (vop-name-r (symbolicate float-type
+				       "-FLOAT-*-COMPLEX-"
+				       float-type
+				       "-FLOAT"))
+	      (complex-sc-type (symbolicate "COMPLEX-" float-type "-REG"))
+	      (real-sc-type (symbolicate float-type "-REG"))
+	      (c-type (symbolicate "COMPLEX-" float-type "-FLOAT"))
+	      (r-type (symbolicate float-type "-FLOAT")))
+	 `(progn
+	   ;; Complex * float
+	   (define-vop (,vop-name)
+	     (:args (x :scs (,complex-sc-type))
+	            (y :scs (,real-sc-type)))
+	     (:results (r :scs (,complex-sc-type)))
+	     (:arg-types ,c-type ,r-type)
+	     (:result-types ,c-type)
+	     (:policy :fast-safe)
+	     (:note _N"inline complex float arithmetic")
+	     (:translate *)
+	     (:temporary (:scs (,complex-sc-type)) t0)
+	     (:generator ,cost
+	       (inst movaps t0 y)	; t0 = y
+	       (inst ,copy t0 t0)	; t0 = y|y
+	       (unless (location= x r)
+		 (inst movaps r x))	; r = xi|xr
+	       (inst ,fmul r t0)))
+	   (define-vop (,vop-name-r)
+	     (:args (x :scs (,real-sc-type))
+	            (y :scs (,complex-sc-type)))
+	     (:results (r :scs (,complex-sc-type)))
+	     (:arg-types ,r-type ,c-type)
+	     (:result-types ,c-type)
+	     (:policy :fast-safe)
+	     (:note _N"inline complex float arithmetic")
+	     (:translate *)
+	     (:temporary (:scs (,complex-sc-type)) t0)
+	     (:generator ,cost
+	       (inst movaps t0 x)	; t0 = 0|x or 0|0|0|x
+	       (inst ,copy t0 t0)	; t0 = x|x or 0|0|x|x
+	       (unless (location= y r)
+		 (inst movaps r y))	; r = yi|yr or 0|0|yi|yr
+	       (inst ,fmul r t0)))))))
+  (complex-*-float single mulps unpcklps 4)
+  (complex-*-float double mulpd unpcklpd 4))
+
+;; Divide a complex by a real
+(define-vop (complex-double-float-/-double-float)
+  (:args (x :scs (complex-double-reg)) (y :scs (double-reg)))
+  (:results (r :scs (complex-double-reg)))
+  (:arg-types complex-double-float double-float)
+  (:result-types complex-double-float)
+  (:policy :fast-safe)
+  (:note _N"inline complex float arithmetic")
+  (:translate /)
+  (:temporary (:sc complex-double-reg) t0)
+  (:generator 4
+    (inst movaps t0 y)			; t0 = u|y
+    (inst unpcklpd t0 t0)		; t0 = y|y
+    (unless (location= x r)
+      (inst movaps r x))		; r = xi|xr
+    (inst divpd r t0)))
+
+(define-vop (complex-single-float-/-single-float)
+  (:args (x :scs (complex-single-reg)) (y :scs (single-reg)))
+  (:results (r :scs (complex-single-reg)))
+  (:arg-types complex-single-float single-float)
+  (:result-types complex-single-float)
+  (:policy :fast-safe)
+  (:note _N"inline complex float arithmetic")
+  (:translate /)
+  (:temporary (:sc complex-single-reg) t0 t1)
+  (:generator 5
+    ;; The upper parts of x may contain junk and dividing that by y
+    ;; may cause spurious signals.  Thus, copy the complex number to
+    ;; the high part.
+    (inst movaps t0 y)			; t0 = u|u|u|y
+    (inst shufps t0 t0 0)		; t0 = y|y|y|y
+    (inst movaps t1 x)			; t1 = u|u|xi|xr
+    (inst movlhps t1 t1)		; t1 = xi|xr|xi|xr
+    (inst divps t1 t0)
+    (inst xorps t0 t0)			; t0 = 0|0|0|0
+    (inst movaps r t1)
+    (inst movlhps r t0)))
+
+(define-vop (sse3-*/complex-double-float)
+  (:translate *)
+  (:args (x :scs (complex-double-reg))
+	 (y :scs (complex-double-reg)))
+  (:arg-types complex-double-float complex-double-float)
+  (:results (r :scs (complex-double-reg)))
+  (:result-types complex-double-float)
+  (:policy :fast-safe)
+  (:temporary (:scs (complex-double-reg)) t1 t2)
+  (:guard (backend-featurep :sse3))
+  (:generator 8
+    ;; Basic algorithm from the paper "The Microarchitecture of the
+    ;; Intel Pentium 4 Processor on 90nm Technololgy"
+    ;;
+    ;; This requires SSSE3 instructions (addsubpd, movddup).
+    ;;
+    ;; x = a + b*i.  In sse2 reg we have: b|a
+    ;; y = c + d*i.  In sse2 reg we have: d|c
+    (inst movddup t1 x)			; t1 = a|a
+    (inst mulpd t1 y)			; t1 = a*d|a*c
+    (inst movapd t2 x)			; t2 = b|a
+    (inst unpckhpd t2 t2)		; t2 = b|b
+    (inst mulpd t2 y)			; t2 = b*d|b*c
+    (inst shufpd t2 t2 1)		; t2 = b*c|b*d
+    (inst addsubpd t1 t2)		; t2 = a*d+b*c|a*c-b*d
+    (inst movapd r t1)))
+
+(define-vop (*/complex-double-float)
+  (:translate *)
+  (:args (x :scs (complex-double-reg))
+	 (y :scs (complex-double-reg)))
+  (:arg-types complex-double-float complex-double-float)
+  (:results (r :scs (complex-double-reg)))
+  (:result-types complex-double-float)
+  (:policy :fast-safe)
+  (:temporary (:scs (complex-double-reg)) t0 t1 t2)
+  (:temporary (:scs (unsigned-reg)) tmp)
+  (:generator 13
+    ;; Basic algorithm from the paper "The Microarchitecture of the
+    ;; Intel Pentium 4 Processor on 90nm Technololgy"
+
+    ;; x = a+b*i = b|a
+    ;; y = c+d*i = d|c
+    ;; r = a*c-b*d + i*(a*d+b*c)
+    (inst movapd t1 y)			; t1 = d|c
+    (inst movapd t2 y)			; t2 = d|c
+    (inst unpcklpd t1 t1)		; t1 = c|c
+    (inst unpckhpd t2 t2)		; t2 = d|d
+    (inst mulpd t1 x)			; t1 = b*c|a*c
+    (inst mulpd t2 x)			; t2 = b*d|a*d
+    (inst shufpd t2 t2 1)		; t2 = a*d|b*d
+    (inst mov tmp #x80000000)
+    (inst movd t0 tmp)			; t0 = 0|0|0|#x80000000
+    (inst psllq t0 32)			; t0 = 0|#x80000000,00000000
+    (inst xorpd t2 t0)			; t2 = a*d|-b*d
+    (inst addpd t2 t1)			; t2 = a*d+b*c | a*c-b*d
+    (inst movapd r t2)))
+
+
+(define-vop (*/complex-single-float)
+  (:translate *)
+  (:args (x :scs (complex-single-reg))
+	 (y :scs (complex-single-reg)))
+  (:arg-types complex-single-float complex-single-float)
+  (:results (r :scs (complex-single-reg)))
+  (:result-types complex-single-float)
+  (:policy :fast-safe)
+  (:temporary (:scs (complex-single-reg)) t0 t1 t2)
+  (:temporary (:scs (unsigned-reg)) tmp)
+  (:generator 14
+    ;; Basic algorithm from the paper "The Microarchitecture of the
+    ;; Intel Pentium 4 Processor on 90nm Technololgy"
+
+    ;; x = a+b*i = b|a
+    ;; y = c+d*i = d|c
+    ;; r = a*c-b*d + i*(a*d+b*c)
+    (inst movaps t1 y)			; t1 = u|u|d|c
+    (inst movaps t2 y)			; t2 = u|u|d|c
+    (inst shufps t1 t1 #b00000000)	; t1 = c|c|c|c
+    (inst shufps t2 t2 #b01010101)	; t2 = d|d|d|d
+    (inst mulps t1 x)			; t1 = b*c|a*c
+    (inst mulps t2 x)			; t2 = b*d|a*d
+    (inst shufps t2 t2 1)		; t2 = a*d|b*d
+    (inst mov tmp #x80000000)
+    (inst movd t0 tmp)			; t0 = 0|0|0|#x80000000
+    (inst xorps t2 t0)			; t2 = a*d|-b*d
+    (inst addps t2 t1)			; t2 = a*d+b*c | a*c-b*d
+    (inst xorps t1 t1)			; t1 = 0|0|0|0
+    (inst movaps r t2)
+    (inst movlhps r t1)))
+
+;; Conjugate
+(define-vop (conjugate/complex-double-float)
+  (:translate conjugate)
+  (:args (z :scs (complex-double-reg)))
+  (:arg-types complex-double-float)
+  (:results (r :scs (complex-double-reg)))
+  (:result-types complex-double-float)
+  (:policy :fast-safe)
+  (:temporary (:scs (complex-double-reg)) ztmp)
+  (:temporary (:scs (unsigned-reg)) tmp)
+  (:generator 2
+    (inst mov tmp #x80000000)
+    (inst movd ztmp tmp)
+    (inst psllq ztmp 32)		; ztmp = 0|#x80000000,00000000
+    (inst shufpd ztmp ztmp 1)		; ztmp = #x80000000,00000000|0
+    (inst xorpd ztmp z)			; ztmp = -xi|xi
+    (inst movapd r ztmp)))
+
+(define-vop (conjugate/complex-single-float)
+  (:translate conjugate)
+  (:args (z :scs (complex-single-reg)))
+  (:arg-types complex-single-float)
+  (:results (r :scs (complex-single-reg)))
+  (:result-types complex-single-float)
+  (:policy :fast-safe)
+  (:temporary (:scs (complex-single-reg)) ztmp)
+  (:temporary (:scs (unsigned-reg)) tmp)
+  (:generator 2
+    (inst mov tmp #x80000000)
+    (inst movd ztmp tmp)
+    (inst psllq ztmp 32)		; ztmp = #x80000000|0
+    (inst xorps ztmp z)			; ztmp = -xi|xr
+    (inst movaps r ztmp)))


=====================================
src/compiler/amd64/macros.lisp
=====================================
@@ -130,10 +130,10 @@
 	      (n-offset offset))
     (ecase (backend-byte-order *target-backend*)
       (:little-endian
-       `(inst mov ,n-target
+       `(inst movzx ,n-target
 	      (make-ea :byte :base ,n-source :disp ,n-offset)))
       (:big-endian
-       `(inst mov ,n-target
+       `(inst movzx ,n-target
 	      (make-ea :byte :base ,n-source :disp (+ ,n-offset 3)))))))
 
 (defmacro load-foreign-data-symbol (reg name )


=====================================
src/compiler/amd64/vm.lisp
=====================================
@@ -268,8 +268,7 @@
 
   ;; Non-Descriptor characters
   (base-char-reg registers
-		 :locations #.byte-regs
-		 :reserve-locations (#.ah-offset #.al-offset)
+		 :locations #.dword-regs
 		 :constant-scs (immediate)
 		 :save-p t
 		 :alternate-scs (base-char-stack))
@@ -385,12 +384,13 @@
 
 (eval-when (compile load eval)
 
-(defconstant byte-sc-names '(base-char-reg byte-reg base-char-stack))
+(defconstant byte-sc-names '(byte-reg))
 (defconstant word-sc-names '(word-reg))
 (defconstant dword-sc-names '(dword-reg))
 (defconstant qword-sc-names
   '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack
-    signed-stack unsigned-stack sap-stack single-stack constant))
+    signed-stack unsigned-stack sap-stack single-stack constant
+    base-char-reg base-char-stack))
 
 ;;;
 ;;; added by jrd.  I guess the right thing to do is to treat floats


=====================================
src/compiler/generic/objdef.lisp
=====================================
@@ -123,13 +123,13 @@
   single-float
   double-float
   #+long-float long-float
-  #+#.(c:target-featurep :double-double)
+  #+double-double
   double-double-float
   complex
   complex-single-float
   complex-double-float
   #+long-float complex-long-float
-  #+#.(c:target-featurep :double-double)
+  #+double-double
   complex-double-double-float
   
   simple-array
@@ -148,12 +148,12 @@
   simple-array-single-float
   simple-array-double-float
   #+long-float simple-array-long-float
-  #+#.(c:target-featurep :double-double)
+  #+double-double
   simple-array-double-double-float
   simple-array-complex-single-float
   simple-array-complex-double-float
   #+long-float simple-array-complex-long-float
-  #+#.(c:target-featurep :double-double)
+  #+double-double
   simple-array-complex-double-double-float
   complex-string
   complex-bit-vector


=====================================
src/tools/cross-scripts/cross-x86-amd64.lisp
=====================================
@@ -206,6 +206,7 @@
 	))
 
 (in-package :vm)
+(defvar *num-fixups* 0)
 (defun fixup-code-object (code offset fixup kind)
   (declare (type index offset))
   (flet ((add-fixup (code offset)



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/1459e4fdb40ba5bab7a3d739de5645d979a74745...894977db1db8740a916f45b2a5c31bbf1f2e00f6

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/1459e4fdb40ba5bab7a3d739de5645d979a74745...894977db1db8740a916f45b2a5c31bbf1f2e00f6
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20200412/44d0f7d4/attachment-0001.htm>


More information about the cmucl-cvs mailing list