[cmucl-cvs] [git] CMU Common Lisp branch file-attribute created. snapshot-2012-06-4-g81f65db
Raymond Toy
rtoy at common-lisp.net
Tue Jun 26 03:47:33 UTC 2012
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".
The branch, file-attribute has been created
at 81f65db452ae1c7c1b8907d10f5bb8bbd4cff37f (commit)
- Log -----------------------------------------------------------------
commit 81f65db452ae1c7c1b8907d10f5bb8bbd4cff37f
Merge: 7ed7451 8a9d1d8
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Mon Jun 25 20:46:20 2012 -0700
Merge branch 'master' into file-attribute
commit 7ed745111ae7a478ba867db257a1888d2948b524
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Thu May 24 21:39:39 2012 -0700
First cut at :file-attribute external format (mostly from Douglas.)
diff --git a/src/code/exports.lisp b/src/code/exports.lisp
index a46d5bb..d0cfe58 100644
--- a/src/code/exports.lisp
+++ b/src/code/exports.lisp
@@ -1511,6 +1511,7 @@
"DESCRIBE-EXTERNAL-FORMAT")
;; Unicode
(:export "STRING-TO-OCTETS" "OCTETS-TO-STRING" "*DEFAULT-EXTERNAL-FORMAT*"
+ "*DEFAULT-SOURCE-EXTERNAL-FORMAT*"
"DESCRIBE-EXTERNAL-FORMAT"
"LIST-ALL-EXTERNAL-FORMATS"
"STRING-ENCODE" "STRING-DECODE"
diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp
index 45b5847..e854a73 100644
--- a/src/code/fd-stream.lisp
+++ b/src/code/fd-stream.lisp
@@ -1362,6 +1362,169 @@
;;;; Utility functions (misc routines, etc)
+(defvar *stream-encoding-file-attribute-translations*
+ '(;; Emacs specific codings.
+ (:iso8859-1 "latin-1" "latin-1-unix" "iso-latin-1-unix" "iso-8859-1-unix")
+ (:utf-8 "utf-8-unix")
+ (:euc-jp "euc-jp-unix")
+ )
+ "List of coding translations used by 'stream-encoding-file-attribute to map
+ the read file coding into a native external-format. Each element is a list of
+ a native external-format followed byte a list of coding strings that are to be
+ mapped to this native format.")
+
+;;; stream-encoding-file-attribute -- Internal
+;;;
+;;; Read the encoding file option from the stream 's which is expected to be a
+;;; character stream with an external-format of :iso8859-1.
+;;;
+(defun stream-encoding-file-attribute (s)
+ (let* ((initial-encoding nil)
+ (declared-encoding nil)
+ (buffer (make-array 1024 :element-type '(unsigned-byte 8)))
+ (available (do ((i 0 (1+ i)))
+ ((>= i 1024) i)
+ (declare (fixnum i))
+ (let ((ch (read-char s nil nil)))
+ (unless ch (return i))
+ (setf (aref buffer i) (char-code ch))))))
+ (labels ((decode-ascii (start size offset)
+ (declare (type fixnum start)
+ (type (integer 1 4) size)
+ (type (integer 0 3) offset))
+ (let ((ascii (make-array 64 :element-type 'character
+ :adjustable t :fill-pointer 0)))
+ (do ()
+ ((< available (+ start size)))
+ (let* ((code (ecase size
+ (1 (aref buffer start))
+ (2 (let ((b0 (aref buffer start))
+ (b1 (aref buffer (1+ start))))
+ (ecase offset
+ (0 (logior (ash b1 8) b0))
+ (1 (logior (ash b0 8) b1)))))
+ (4
+ (let ((b0 (aref buffer start))
+ (b1 (aref buffer (+ start 1)))
+ (b2 (aref buffer (+ start 2)))
+ (b3 (aref buffer (+ start 3))))
+ (ecase offset
+ (0 (logior (ash b3 24) (ash b2 16) (ash b1 8) b0))
+ (1 (logior (ash b1 24) (ash b0 16) (ash b3 8) b2))
+ (2 (logior (ash b2 24) (ash b3 16) (ash b0 8) b1))
+ (3 (logior (ash b0 24) (ash b1 16) (ash b2 8) b3))))))))
+ (incf start size)
+ (let ((ch (if (< 0 code #x80) (code-char code) #\?)))
+ (vector-push-extend ch ascii))))
+ ascii))
+ (parse-file-option (ascii)
+ ;; Parse the file options.
+ (let ((found (search "-*-" ascii))
+ (options nil))
+ (when found
+ (block do-file-options
+ (let* ((start (+ found 3))
+ (end (search "-*-" ascii :start2 start)))
+ (unless end
+ (return-from do-file-options))
+ (unless (find #\: ascii :start start :end end)
+ (return-from do-file-options))
+ (do ((opt-start start (1+ semi)) colon semi)
+ (nil)
+ (setf colon (position #\: ascii :start opt-start :end end))
+ (unless colon
+ (return-from do-file-options))
+ (setf semi (or (position #\; ascii :start colon :end end) end))
+ (let ((option (string-trim '(#\space #\tab)
+ (subseq ascii opt-start colon)))
+ (value (string-trim '(#\space #\tab)
+ (subseq ascii (1+ colon) semi))))
+ (push (cons option value) options)
+ (when (= semi end) (return nil)))))))
+ (setf declared-encoding
+ (cond ((cdr (assoc "external-format" options :test 'equalp)))
+ ((cdr (assoc "encoding" options :test 'equalp)))
+ ((cdr (assoc "coding" options :test 'equalp))))))))
+ (cond ((>= available 4)
+ (let ((b1 (aref buffer 0))
+ (b2 (aref buffer 1))
+ (b3 (aref buffer 2))
+ (b4 (aref buffer 3)))
+ (cond ((and (= b1 #x00) (= b2 #x00) (= b3 #xFE) (= b4 #xFF))
+ (setf initial-encoding :ucs-4be)
+ (parse-file-option (decode-ascii 4 4 3)))
+ ((and (= b1 #xff) (= b2 #xfe))
+ (cond ((and (= b3 #x00) (= b4 #x00))
+ (setf initial-encoding :ucs-4le)
+ (parse-file-option (decode-ascii 4 4 0)))
+ (t
+ (setf initial-encoding :utf-16le)
+ (parse-file-option (decode-ascii 2 2 0)))))
+ ((and (= b1 #x00) (= b2 #x00) (= b3 #xFF) (= b4 #xFE))
+ (parse-file-option (decode-ascii 4 4 2)))
+ ((and (= b1 #xfe) (= b2 #xff))
+ (cond ((and (= b3 #x00) (= b4 #x00))
+ (parse-file-option (decode-ascii 4 4 1)))
+ (t
+ (setf initial-encoding :utf-16be)
+ (parse-file-option (decode-ascii 2 2 1)))))
+ ((and (= b1 #xEF) (= b2 #xBB) (= b3 #xBF))
+ (setf initial-encoding :utf-8))
+ ((and (> b1 0) (= b2 0) (= b3 0) (= b4 0))
+ (setf initial-encoding :ucs-4le)
+ (parse-file-option (decode-ascii 0 4 0)))
+ ((and (= b1 0) (> b2 0) (= b3 0) (= b4 0))
+ (parse-file-option (decode-ascii 0 4 1)))
+ ((and (= b1 0) (= b2 0) (> b3 0) (= b4 0))
+ (parse-file-option (decode-ascii 0 4 2)))
+ ((and (= b1 0) (= b2 0) (= b3 0) (> b4 0))
+ (setf initial-encoding :ucs-4be)
+ (parse-file-option (decode-ascii 0 4 3)))
+ ((and (> b1 0) (= b2 0) (> b3 0) (= b4 0))
+ (setf initial-encoding :utf-16le)
+ (parse-file-option (decode-ascii 0 2 0)))
+ ((and (= b1 0) (> b2 0) (= b3 0) (> b4 0))
+ (setf initial-encoding :utf-16be)
+ (parse-file-option (decode-ascii 0 2 1)))
+ ((and (= b1 #x2B) (= b2 #x41)
+ (or (= b3 #x43) (= b3 #x44)))
+ (setf initial-encoding :utf-7))
+ ((and (= b1 #x2F) (= b2 #x2B) (= b3 #x41))
+ (setf initial-encoding :utf-7))
+ (t
+ (parse-file-option (decode-ascii 0 1 0))))))
+ ((= available 3)
+ (when (and (= (aref buffer 0) #xEF)
+ (= (aref buffer 1) #xBB)
+ (= (aref buffer 2) #xBF))
+ (setf initial-encoding :utf-8)))
+ ((= available 2)
+ (let ((b1 (aref buffer 0))
+ (b2 (aref buffer 1)))
+ (cond ((and (= b1 #xff) (= b2 #xfe))
+ (setf initial-encoding :utf-16le))
+ ((and (= b1 #xfe) (= b2 #xff))
+ (setf initial-encoding :utf-16be)))))))
+ ;;
+ ;;
+ (cond ((and (not initial-encoding) (not declared-encoding))
+ :default)
+ (t
+ (let ((encoding (or declared-encoding initial-encoding)))
+ (when (stringp encoding)
+ (setf encoding (string-upcase encoding))
+ (dolist (translations *stream-encoding-file-attribute-translations*)
+ (when (member encoding (rest translations) :test 'equalp)
+ (setf encoding (first translations))
+ (return))))
+ (let ((external-format
+ (cond ((eq encoding :default) :default)
+ ((stringp encoding)
+ (intern encoding :keyword))
+ (t
+ encoding))))
+ external-format))))))
+
;;; SET-ROUTINES -- internal
;;;
;;; Fill in the various routine slots for the given type. Input-p and
@@ -1916,20 +2079,7 @@
(setf (fd-stream-flags stream) #b001))
(t
(setf (fd-stream-flags stream) #b010)))
-
- ;; FIXME: setting the external format here should be better
- ;; integrated into set-routines. We do it before so that
- ;; set-routines can create an in-buffer if appropriate. But we
- ;; need to do it after to put the correct input routines for the
- ;; external format.
;;
- ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
- #+(and unicode (not unicode-bootstrap))
- (%set-fd-stream-external-format stream external-format nil)
- (set-routines stream element-type input output input-buffer-p
- :binary-stream-p binary-stream-p)
- #+(and unicode (not unicode-bootstrap))
- (%set-fd-stream-external-format stream external-format nil)
(when (and auto-close (fboundp 'finalize))
(finalize stream
#'(lambda ()
@@ -1937,6 +2087,46 @@
(format *terminal-io* (intl:gettext "** Closed ~A~%") name)
(when original
(revert-file file original)))))
+ ;;
+ ;; FIXME: setting the external format here should be better
+ ;; integrated into set-routines. We do it before so that
+ ;; set-routines can create an in-buffer if appropriate. But we
+ ;; need to do it after to put the correct input routines for the
+ ;; external format.
+ ;;
+ ;;#-unicode-bootstrap ; fails in stream-reinit otherwise
+ #+(and unicode (not unicode-bootstrap))
+ (cond ((and (eq external-format :file-attribute) input)
+ ;; Read the encoding file option with the external-format set to
+ ;; :iso8859-1, and then change the external-format if necessary.
+ #+(and unicode (not unicode-bootstrap))
+ (%set-fd-stream-external-format stream :iso8859-1 nil)
+ (set-routines stream element-type input output input-buffer-p
+ :binary-stream-p binary-stream-p)
+ #+(and unicode (not unicode-bootstrap))
+ (%set-fd-stream-external-format stream :iso8859-1 nil)
+ (let ((encoding (stream-encoding-file-attribute stream)))
+ (unless (file-position stream :start)
+ (error (intl:gettext "The ~A external-format requires a file stream.")
+ external-format))
+ (unless (member encoding '(:iso8859-1 :iso-8859-1))
+ (setf (stream-external-format stream) (or encoding :default)))))
+ ((eq external-format :file-attribute)
+ ;; Non-input stream, so can not read the file attributes, so use the
+ ;; :default.
+ #+(and unicode (not unicode-bootstrap))
+ (%set-fd-stream-external-format stream :default nil)
+ (set-routines stream element-type input output input-buffer-p
+ :binary-stream-p binary-stream-p)
+ #+(and unicode (not unicode-bootstrap))
+ (%set-fd-stream-external-format stream :default nil))
+ (t
+ #+(and unicode (not unicode-bootstrap))
+ (%set-fd-stream-external-format stream external-format nil)
+ (set-routines stream element-type input output input-buffer-p
+ :binary-stream-p binary-stream-p)
+ #+(and unicode (not unicode-bootstrap))
+ (%set-fd-stream-external-format stream external-format nil)))
stream))
diff --git a/src/code/load.lisp b/src/code/load.lisp
index 832853b..89f7705 100644
--- a/src/code/load.lisp
+++ b/src/code/load.lisp
@@ -19,7 +19,7 @@
(in-package "EXTENSIONS")
(export '(*load-if-source-newer* *load-source-types* *load-object-types*
- invalid-fasl))
+ invalid-fasl *default-source-external-format*))
(in-package "SYSTEM")
(export '(foreign-symbol-address alternate-get-global-address))
@@ -94,6 +94,12 @@
(invalid-fasl-pathname condition)
(invalid-fasl-version condition)
(invalid-fasl-expected-version condition)))))
+
+(defvar *default-source-external-format* :default
+ "The external-format that 'load and 'compile-file use when given an
+ external-format of :default. The default value is :default which will open
+ the file using the 'ext:*default-external-format*")
+
;;; LOAD-FRESH-LINE -- internal.
;;;
@@ -523,6 +529,10 @@
defaulting. Probably only necessary if you have source files with a
\"fasl\" type.
+ :EXTERNAL-FORMAT
+ The external-format to use when opening the FILENAME. The default is
+ :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.
+
The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER*
determine the defaults for the corresponding keyword arguments. These
variables are also bound to the specified argument values, so specifying a
@@ -604,6 +614,8 @@
(*load-pathname* pathname))
(case contents
(:source
+ (when (eq external-format :default)
+ (setf external-format *default-source-external-format*))
(with-open-file (file truename :external-format external-format
:direction :input
:if-does-not-exist if-does-not-exist)
diff --git a/src/compiler/main.lisp b/src/compiler/main.lisp
index 544ccb0..066ff2d 100644
--- a/src/compiler/main.lisp
+++ b/src/compiler/main.lisp
@@ -738,12 +738,12 @@
:write-date (file-write-date x)
:language :lisp))
files)))
-
+ (when (eq external-format :default)
+ (setf external-format *default-source-external-format*))
(make-source-info :files file-info
:current-file file-info
#+unicode :external-format
- #+unicode (stream::ef-name
- (stream::find-external-format external-format))
+ #+unicode external-format
#+unicode :decoding-error
#+unicode decoding-error)))
diff --git a/src/i18n/locale/cmucl.pot b/src/i18n/locale/cmucl.pot
index 5b38108..bb807f4 100644
--- a/src/i18n/locale/cmucl.pot
+++ b/src/i18n/locale/cmucl.pot
@@ -9187,6 +9187,16 @@ msgid "Error reading ~S: ~A"
msgstr ""
#: src/code/fd-stream.lisp
+msgid ""
+"List of coding translations used by 'stream-encoding-file-attribute to map\n"
+" the read file coding into a native external-format. Each element is a "
+"list of\n"
+" a native external-format followed byte a list of coding strings that are "
+"to be\n"
+" mapped to this native format."
+msgstr ""
+
+#: src/code/fd-stream.lisp
msgid "Could not find any input routine for ~S"
msgstr ""
@@ -9263,6 +9273,10 @@ msgid "** Closed ~A~%"
msgstr ""
#: src/code/fd-stream.lisp
+msgid "The ~A external-format requires a file stream."
+msgstr ""
+
+#: src/code/fd-stream.lisp
msgid ""
"This is a string that OPEN tacks on the end of a file namestring to produce\n"
" a name for the :if-exists :rename-and-delete and :rename options. Also,\n"
@@ -10064,6 +10078,14 @@ msgid ""
msgstr ""
#: src/code/load.lisp
+msgid ""
+"The external-format that 'load and 'compile-file use when given an\n"
+" external-format of :default. The default value is :default which will "
+"open\n"
+" the file using the 'ext:*default-external-format*"
+msgstr ""
+
+#: src/code/load.lisp
msgid "List of free fop tables for the fasloader."
msgstr ""
@@ -10133,6 +10155,10 @@ msgid ""
" defaulting. Probably only necessary if you have source files with a\n"
" \"fasl\" type. \n"
"\n"
+" :EXTERNAL-FORMAT\n"
+" The external-format to use when opening the FILENAME. The default is\n"
+" :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.\n"
+"\n"
" The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER"
"*\n"
" determine the defaults for the corresponding keyword arguments. These\n"
-----------------------------------------------------------------------
hooks/post-receive
--
CMU Common Lisp
More information about the cmucl-cvs
mailing list