[oct-cvs] Oct commit: oct qd-extra.lisp

rtoy rtoy at common-lisp.net
Fri Oct 26 15:48:15 UTC 2007


Update of /project/oct/cvsroot/oct
In directory clnet:/tmp/cvs-serv1229

Modified Files:
	qd-extra.lisp 
Log Message:
Add Pade approximation for exp.  From Richard Fateman.

Experiments show that it is 25% faster than exp-qd/reduce.  However,
it loses 3 bits of accuracy compared with exp-qd/reduce.


--- /project/oct/cvsroot/oct/qd-extra.lisp	2007/10/15 18:54:02	1.4
+++ /project/oct/cvsroot/oct/qd-extra.lisp	2007/10/26 15:48:15	1.5
@@ -1018,6 +1018,71 @@
       (mul-qd +cordic-scale+ y))))
 
 
+;; Evaluate the polynomial poly at the point q.  The polynomial is
+;; a list of the coefficients arranged in descending powers.
+(defun poly-eval-qd (q poly)
+  (let ((sum (%make-qd-d 0d0 0d0 0d0 0d0)))
+    (dolist (c poly)
+      (setf sum (add-qd (mul-qd q sum) c)))
+    sum))
+
+;; Like poly-eval-qd, except the polynomial is a list of double-floats
+(defun poly-eval-qd-d (q poly)
+  (let ((sum (%make-qd-d 0d0 0d0 0d0 0d0)))
+    (dolist (c poly)
+      (setf sum (add-qd-d (mul-qd q sum) c)))
+    sum))
+
+
+;; This idea is from Richard Fateman.  I've added a few additional
+;; notes, but these are mostly from Richard.
+;;
+;; We can evaluate exp(x) for |x|< 0.01, say, by a pade approximation,
+;; computed using Maxima:
+;;
+;; taylor(exp(x), x, 0, 20)$
+;;
+;; pade(%, 10, 10) ->
+;;
+;; [(x^10+110*x^9+5940*x^8+205920*x^7+5045040*x^6+90810720*x^5+1210809600*x^4
+;;  +11762150400*x^3+79394515200*x^2+335221286400*x^+670442572800)
+;;  /(x^10-110*x^9+
+;;  5940*x^8-205920*x^7+5045040*x^6-90810720*x^5+1210809600*x^4-11762150400*x^3+
+;;  79394515200*x^2-335221286400*x^+670442572800)]
+;;
+;; The numerator and denominator have the same coefficients with
+;; different signs on odd terms.  so we note that num and den here can
+;; be evaluated as f(x^2)+x*g(x^2) and f(x^2)-x*g(x^2) respectively using
+;; half the number of multiplies.
+
+(defconstant f-exp-pade
+  '(1.0d0 5940.0d0 5045040.0d0  1.2108096d+9
+    7.93945152d+10 6.704425728d+11))
+
+(defconstant g-exp-pade
+  '(110.0d0  205920.0d0  9.081072d+7 
+    1.17621504d+10  3.352212864d+11 ))
+
+(defun exp-qd/pade-small (x)
+  ;; exp(x) = (f(x^2) + x*g(x^2))/(f(x^2) - x*g(x^2))
+  (let* ((x^2 (sqr-qd x))
+	 (fx (poly-eval-qd-d x^2 f-exp-pade))
+	 (x*gx (mul-qd x (poly-eval-qd-d x^2 g-exp-pade))))
+    (div-qd (add-qd fx x*gx)
+	    (sub-qd fx x*gx))))
+
+(defun exp-qd/pade (a)
+  ;; Same idea as in exp-qd/reduce, except we use our Pade
+  ;; approximation instead of a Taylor series.
+  (let* ((k 256)
+	 (z (truncate (qd-0 (nint-qd (div-qd a +qd-log2+)))))
+	 (r (div-qd-d (sub-qd a (mul-qd-d +qd-log2+ (float z 1d0)))
+		      (float k 1d0)))
+  
+	 (e (exp-qd/pade-small r)))
+    (scale-float-qd (npow e k) z)))
+
+
 ;; Some timing and consing tests.
 ;;
 ;; The tests are run using the following:
@@ -1035,11 +1100,13 @@
 ;; exp-qd/reduce	2.06	 3.18	10.46	2.76		 6.12
 ;; expm1-qd/series	8.81	12.24	18.87	3.26		29.0
 ;; expm1-qd/dup		5.68	 4.34	18.47	3.64		18.78
+;; exp-qd/pade          1.53
 ;;
 ;; Consing (MB)		Sparc
 ;; exp-qd/reduce	 45   	 45   	 638   	44.4   		 45
 ;; expm1-qd/series	519   	519   	1201  	14.8   		519
 ;; expm1-qd/dup		 32   	 32   	1224   	32.0   		 32
+;; exp-qd/pade           44
 ;;
 ;; Speeds seem to vary quite a bit between architectures.
 ;;
@@ -1066,17 +1133,22 @@
 	   (fixnum n))
   (let ((y +qd-zero+))
     (declare (type %quad-double y))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "exp-qd/reduce~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (exp-qd/reduce x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
+    (format t "exp-qd/pade~%")
+    (time (dotimes (k n)
+	    (declare (fixnum k))
+	    (setf y (exp-qd/pade x))))
+    #+cmu (ext:gc :full t)
     (format t "expm1-qd/series~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (expm1-qd/series x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "expm1-qd/duplication~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
@@ -1084,6 +1156,87 @@
   
     ))
 
+#||
+
+;; This bit of code is meant to be run with Clisp, which has arbitrary
+;; precision long floats.
+;;
+;; We use this to compare the accuracy of exp-qd/reduce and
+;; exp-qd/pade against Clisp's exp implementation.
+;;
+;; Some results:
+;;
+;; k = 159000
+;; pade min   =       4.733L-68
+;; pade max   =       5.153L-61
+;; pade sumsq =      7.899L-119
+;; red min    =       3.838L-69
+;; red max    =       4.318L-62
+;; red sumsq  =      6.076L-120
+;;
+;; So, worst case error is:
+;;
+;; exp-qd/reduce: 4.318L-62 (203.8 bits)
+;; exp-qd/pade:   5.153L-61 (200.3 bits)
+;;
+;; We lose 3 bits for Pade.  exp-qd/reduce is "missing" 8 bits of
+;; accuracy.  Bummer.
+
+(in-package #:oct)
+
+;; 240 bit fractions for long-floats.  I hope this is accurate enough
+;; compared against our 212 bit fractions for quad-doubles.
+(setf (ext:long-float-digits) 240)
+(defun compare-exp (n &optional verbose)
+  (let ((pade-max -1L0)
+	(pade-min 1L100)
+	(pade-sum 0L0)
+	(red-max -1L0)
+	(red-min 1L100)
+	(red-sum 0L0))
+    (dotimes (k n)
+      (let* ((f (random (make-qd 1/256)))
+	     (pade-exp (octi::exp-qd/pade (qd-value f)))
+	     (red-exp (octi::exp-qd/reduce (qd-value f))))
+	(flet ((qd-to-lf (qd)
+		 (octi:with-qd-parts (q0 q1 q2 q3)
+		     qd
+		   (+ (float q0 1L0)
+		      (float q1 1L0)
+		      (float q2 1L0)
+		      (float q3 1L0)))))
+	  (let* ((lf (qd-to-lf (qd-value f)))
+		 (l-exp (exp lf))
+		 (pade-exp-lf (qd-to-lf pade-exp))
+		 (red-exp-lf (qd-to-lf red-exp))
+		 (pade-diff (abs (- l-exp pade-exp-lf)))
+		 (red-diff (abs (- l-exp red-exp-lf))))
+	    (when verbose
+	      (format t "f     = ~S~%" f)
+	      (format t "pade-exp = ~A~%" (make-instance 'qd-real :value pade-exp))
+	      (format t "l-exp =   ~A~%" l-exp)
+	      (format t "diff  = ~A~%" pade-diff))
+	    (when (and (plusp k) (zerop (mod k 1000)))
+	      (format t "k = ~A~%" k)
+	      (format t "pade min   = ~15,3g~%" pade-min)
+	      (format t "pade max   = ~15,3g~%" pade-max)
+	      (format t "pade sumsq = ~15,3g~%" pade-sum)
+	      (format t "red min    = ~15,3g~%" red-min)
+	      (format t "red max    = ~15,3g~%" red-max)
+	      (format t "red sumsq  = ~15,3g~%" red-sum))
+	    (setf pade-max (max pade-max pade-diff))
+	    (setf pade-min (min pade-min pade-diff))
+	    (incf pade-sum (* pade-diff pade-diff))
+	    (setf red-max (max red-max red-diff))
+	    (setf red-min (min red-min red-diff))
+	    (incf red-sum (* red-diff red-diff)))
+	  )))
+  (values pade-max pade-min pade-sum
+	  red-max red-min red-sum
+	  )))
+	  
+||#
+
 ;; (time-log #c(3w0 0) 50000)
 ;;
 ;; Time (s)		Sparc	PPC	x86	PPC (fma)	Sparc2
@@ -1147,33 +1300,33 @@
 	   (fixnum n))
   (let ((y +qd-zero+))
     (declare (type %quad-double y))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "log-qd/newton~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (log-qd/newton x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "log1p-qd/duplication~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (log1p-qd/duplication x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "log-qd/agm~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (log-qd/agm x))))
   
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "log-qd/agm2~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (log-qd/agm2 x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "log-qd/agm3~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (log-qd/agm3 x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "log-qd/halley~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
@@ -1217,17 +1370,17 @@
 	   (fixnum n))
   (let ((y +qd-zero+)
 	(one +qd-one+))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "atan2-qd/newton~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (atan2-qd/newton x one))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "atan2-qd/cordic~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (atan2-qd/cordic x one))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "atan-qd/duplication~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
@@ -1260,12 +1413,12 @@
   (declare (type %quad-double x)
 	   (fixnum n))
   (let ((y +qd-zero+))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "tan-qd/cordic~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))
 	    (setf y (tan-qd/cordic x))))
-    #+cmu (gc :full t)
+    #+cmu (ext:gc :full t)
     (format t "tan-qd/sincos~%")
     (time (dotimes (k n)
 	    (declare (fixnum k))




More information about the oct-cvs mailing list