[Git][cmucl/cmucl][master] 2 commits: Address #139: Set filename encoding to :utf-8

Raymond Toy (@rtoy) gitlab at common-lisp.net
Fri Jan 13 23:33:57 UTC 2023



Raymond Toy pushed to branch master at cmucl / cmucl


Commits:
dbdec3a5 by Raymond Toy at 2023-01-13T23:33:47+00:00
Address #139: Set filename encoding to :utf-8

- - - - -
d004986e by Raymond Toy at 2023-01-13T23:33:49+00:00
Merge branch 'issue-139-set-filename-encoding-to-utf8' into 'master'

Address #139: Set filename encoding to :utf-8

See merge request cmucl/cmucl!109
- - - - -


4 changed files:

- src/code/extfmts.lisp
- src/code/save.lisp
- src/code/unix.lisp
- tests/issues.lisp


Changes:

=====================================
src/code/extfmts.lisp
=====================================
@@ -370,8 +370,10 @@
 		    #() '())))))
 
 (defun load-external-format-aliases ()
+  ;; Set filename encoding to NIL to bypass any encoding; it's not
+  ;; needed to open the aliases file.  NIL means the pathname string is passed as is where only the low 8 bits of the 
   (let ((*package* (find-package "KEYWORD"))
-	(unix::*filename-encoding* :iso8859-1))
+	(unix::*filename-encoding* nil))
     (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
 			 :external-format :iso8859-1)
       (when stm
@@ -486,11 +488,16 @@
       (and (consp name) (find-external-format name))
       (and (with-standard-io-syntax
 	     ;; Use standard IO syntax so that changes by the user
-	     ;; don't mess up compiling the external format.
-	     (let ((*package* (find-package "STREAM"))
-		   (lisp::*enable-package-locked-errors* nil)
-		   (s (open (format nil "ext-formats:~(~A~).lisp" name)
-			    :if-does-not-exist nil :external-format :iso8859-1)))
+	     ;; don't mess up compiling the external format, but we
+	     ;; don't need to print readably.  Also, set filename
+	     ;; encoding to NIL because we don't need any special
+	     ;; encoding to open the format files.
+	     (let* ((*print-readably* nil)
+		    (unix::*filename-encoding* nil)
+		    (*package* (find-package "STREAM"))
+		    (lisp::*enable-package-locked-errors* nil)
+		    (s (open (format nil "ext-formats:~(~A~).lisp" name)
+			     :if-does-not-exist nil :external-format :iso8859-1)))
 	       (when s
 		 (null (nth-value 1 (ext:compile-from-stream s))))))
            (gethash name *external-formats*))))


=====================================
src/code/save.lisp
=====================================
@@ -164,7 +164,35 @@
 		 *default-external-format*))))
   (values))
 
- 
+(defun decode-runtime-strings (locale file-locale)
+  ;; The C runtime can initialize the following strings from the
+  ;; command line or the environment.  We need to decode these into
+  ;; the utf-16 strings that Lisp uses.
+  (setf lisp-command-line-list
+	(mapcar #'(lambda (s)
+		    (stream:string-decode s locale))
+		lisp-command-line-list))
+  (setf lisp-environment-list
+	(mapcar #'(lambda (s)
+		    (stream:string-decode s locale))
+		lisp-environment-list))
+  ;; This needs more work..  *cmucl-lib* could be set from the the envvar
+  ;; "CMUCLLIB" or from the "-lib" command-line option, and thus
+  ;; should use the LOCALE to decode the string.
+  (when *cmucl-lib*
+    (setf *cmucl-lib*
+	  (stream:string-decode *cmucl-lib* file-locale)))
+  ;; This also needs more work since the core path could come from the
+  ;; "-core" command-line option and should thus use LOCALE to decode
+  ;; the string.  It could also come from the "CMUCLCORE" envvar.
+  (setf *cmucl-core-path*
+	(stream:string-decode *cmucl-core-path* file-locale))
+  ;; *unidata-path* defaults to a pathname object, but the user can
+  ;; specify a path, so we need to decode the string path if given.
+  (when (and *unidata-path* (stringp *unidata-path*))
+    (setf *unidata-path*
+	  (stream:string-decode *unidata-path* file-locale))))
+
 (defun save-lisp (core-file-name &key
 				 (purify t)
 				 (root-structures ())
@@ -278,12 +306,18 @@
 	     ;; Load external format aliases now so we can aliases to
 	     ;; specify the external format.
 	     (stream::load-external-format-aliases)
-	     ;; Set the locale for lisp
-	     (intl::setlocale)
 	     ;; Set up :locale format
 	     (set-up-locale-external-format)
-	     ;; Set terminal encodings to :locale
-	     (set-system-external-format :locale)
+	     ;; Set terminal encodings to :locale and filename encoding to :utf-8.
+	     ;; (This needs more work on Darwin.)
+	     (set-system-external-format :locale :utf-8)
+	     (decode-runtime-strings :locale :utf-8)
+	     ;; Need to reinitialize the environment again because
+	     ;; we've possibly changed the environment variables and
+	     ;; pathnames.
+	     (environment-init)
+	     ;; Set the locale for lisp
+	     (intl::setlocale)
 	     (ext::process-command-strings process-command-line)
 	     (setf *editor-lisp-p* nil)
 	     (macrolet ((find-switch (name)


=====================================
src/code/unix.lisp
=====================================
@@ -25,7 +25,12 @@
 ;; it must be set to :iso8859-1 (or left as NIL), making files with
 ;; non-Latin-1 characters "mojibake", but otherwise they'll be inaccessible.
 ;; Must be set to NIL initially to enable building Lisp!
-(defvar *filename-encoding* nil)
+(defvar *filename-encoding* nil
+  "The encoding to use for converting a namestring to a string that can
+  be used by the operations system.  It must be a valid
+  external-format name or NIL.  NIL means the string is passed as is
+  to the operating system.  The operating system will get the low 8
+  bits of each UTF-16 code unit of the string.")
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (defmacro %name->file (string)


=====================================
tests/issues.lisp
=====================================
@@ -258,6 +258,13 @@
 	(assert-equal (map 'list #'char-code out-string)
 		      (map 'list #'char-code expected))))))
 
+(define-test issue.25c-setup
+    (:tag :issues)
+  ;; Get the external format before running the test issue.25c.  See
+  ;; issue #161
+  ;; (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/161).
+  (assert-true (stream::find-external-format :utf16-be)))
+
 (define-test issue.25c
     (:tag :issues)
   ;; Modified test to verify that each octet read from run-program is
@@ -682,10 +689,7 @@
   ;; work and not return NIL.
   (assert-true (file-author "."))
   (assert-true (file-author "bin/build.sh"))
-  (let ((unix::*filename-encoding* :utf-8))
-    ;; Set filename encoding to utf-8 so that we can encode the
-    ;; filename properly.
-    (assert-true
+  (assert-true
    (file-author
     (merge-pathnames 
      (concatenate 'string
@@ -696,7 +700,7 @@
 		  '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha
 		    #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga)
 		  ".txt")
-     *test-path*)))))
+     *test-path*))))
 
 (define-test issue.139-default-external-format
     (:tag :issues)



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6fc2e38e925ab9f3fcfb7e54ca059d26ae85af02...d004986e80238a6ddc6dc8796c7b2150670bc413

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6fc2e38e925ab9f3fcfb7e54ca059d26ae85af02...d004986e80238a6ddc6dc8796c7b2150670bc413
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20230113/5d972624/attachment-0001.html>


More information about the cmucl-cvs mailing list