[Git][cmucl/cmucl][issue-158-darwin-pathnames] 8 commits: Address #139: Set filename encoding to :utf-8

Raymond Toy (@rtoy) gitlab at common-lisp.net
Wed Feb 15 21:51:59 UTC 2023



Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl


Commits:
dbdec3a5 by Raymond Toy at 2023-01-13T23:33:47+00:00
Address #139: Set filename encoding to :utf-8

- - - - -
d004986e by Raymond Toy at 2023-01-13T23:33:49+00:00
Merge branch 'issue-139-set-filename-encoding-to-utf8' into 'master'

Address #139: Set filename encoding to :utf-8

See merge request cmucl/cmucl!109
- - - - -
d01f2cf9 by Raymond Toy at 2023-01-18T08:00:32-08:00
Fix #162:  Change *filename-encoding* to use :no-encoding

Instead of using `NIL` to indicate that `*filename-encoding*` is not
to be done, use `:no-encoding` to indicate that.  This makes it a bit
clearer what `*filename-encoding*` means.

- - - - -
7c44d848 by Raymond Toy at 2023-01-19T15:30:06-08:00
Use :null instead of :no-encoding for no filename encoding

The advantage of using `:null` is that it's a recognized external
format (that aliases to `:void`).  So if we inadvertently use `:null`
as a filename encoding somewhere unexpected, it will cause an
error (because the `:void` encoding does).

- - - - -
ce202074 by Raymond Toy at 2023-02-10T08:29:32-08:00
Fix stupid typos

Really stupid typos/thinkos:
* Forgot to change initial value if `*filename-encoding*` from
  `:no-encoding` to `:null`.  (Stupid!)
* Fix typo: `:nul` instead of `:null` in `%name->file`.

Update cmucl-unix.pot too for the change in the docstring for
`*filename-encoding*`.

- - - - -
9eb801f6 by Raymond Toy at 2023-02-15T13:01:43-08:00
Disable issue.41.1 when running CI

This test was previously disabled only for Linux when running the CI.
However, it's now also failing when running the CI for Darwin.  Thus
disable it whenever we're running the CI.

I just manually tested this on my Linux and Mac boxes.  This test
passes without any problem.  Not sure what's going on.

- - - - -
4be1d90c by Raymond Toy at 2023-02-15T21:23:15+00:00
Merge branch 'issue-162-filename-encoding-no-encoding' into 'master'

Fix #162:  Change *filename-encoding* to use :null

Closes #162

See merge request cmucl/cmucl!111
- - - - -
e5a4b66d by Raymond Toy at 2023-02-15T13:51:37-08:00
Merge branch 'master' into issue-158-darwin-pathnames

- - - - -


6 changed files:

- src/code/extfmts.lisp
- src/code/lispinit.lisp
- src/code/save.lisp
- src/code/unix.lisp
- src/i18n/locale/cmucl-unix.pot
- tests/issues.lisp


Changes:

=====================================
src/code/extfmts.lisp
=====================================
@@ -370,8 +370,10 @@
 		    #() '())))))
 
 (defun load-external-format-aliases ()
+  ;; Set filename encoding to NIL to bypass any encoding; it's not
+  ;; needed to open the aliases file.  NIL means the pathname string is passed as is where only the low 8 bits of the 
   (let ((*package* (find-package "KEYWORD"))
-	(unix::*filename-encoding* :iso8859-1))
+	(unix::*filename-encoding* :null))
     (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil
 			 :external-format :iso8859-1)
       (when stm
@@ -486,11 +488,16 @@
       (and (consp name) (find-external-format name))
       (and (with-standard-io-syntax
 	     ;; Use standard IO syntax so that changes by the user
-	     ;; don't mess up compiling the external format.
-	     (let ((*package* (find-package "STREAM"))
-		   (lisp::*enable-package-locked-errors* nil)
-		   (s (open (format nil "ext-formats:~(~A~).lisp" name)
-			    :if-does-not-exist nil :external-format :iso8859-1)))
+	     ;; don't mess up compiling the external format, but we
+	     ;; don't need to print readably.  Also, set filename
+	     ;; encoding to NIL because we don't need any special
+	     ;; encoding to open the format files.
+	     (let* ((*print-readably* nil)
+		    (unix::*filename-encoding* :null)
+		    (*package* (find-package "STREAM"))
+		    (lisp::*enable-package-locked-errors* nil)
+		    (s (open (format nil "ext-formats:~(~A~).lisp" name)
+			     :if-does-not-exist nil :external-format :iso8859-1)))
 	       (when s
 		 (null (nth-value 1 (ext:compile-from-stream s))))))
            (gethash name *external-formats*))))
@@ -1150,7 +1157,7 @@ character and illegal outputs are replaced by a question mark.")
     (unless (find-external-format filenames)
       (error (intl:gettext "Can't find external-format ~S.") filenames))
     (setq filenames (ef-name (find-external-format filenames)))
-    (when (and unix::*filename-encoding*
+    (when (and (not (eq unix::*filename-encoding* :null))
 	       (not (eq unix::*filename-encoding* filenames)))
       (cerror (intl:gettext "Change it anyway.")
 	      (intl:gettext "The external-format for encoding filenames is already set.")))


=====================================
src/code/lispinit.lisp
=====================================
@@ -344,7 +344,7 @@
   #-gengc (setf unix::*interrupt-pending* nil)
   (setf *type-system-initialized* nil)
   (setf *break-on-signals* nil)
-  (setf unix::*filename-encoding* nil)
+  (setf unix::*filename-encoding* :null)
   (setf *enable-darwin-path-normalization* nil)
   #+gengc (setf conditions::*handler-clusters* nil)
   (setq intl::*default-domain* "cmucl")


=====================================
src/code/save.lisp
=====================================
@@ -164,7 +164,35 @@
 		 *default-external-format*))))
   (values))
 
- 
+(defun decode-runtime-strings (locale file-locale)
+  ;; The C runtime can initialize the following strings from the
+  ;; command line or the environment.  We need to decode these into
+  ;; the utf-16 strings that Lisp uses.
+  (setf lisp-command-line-list
+	(mapcar #'(lambda (s)
+		    (stream:string-decode s locale))
+		lisp-command-line-list))
+  (setf lisp-environment-list
+	(mapcar #'(lambda (s)
+		    (stream:string-decode s locale))
+		lisp-environment-list))
+  ;; This needs more work..  *cmucl-lib* could be set from the the envvar
+  ;; "CMUCLLIB" or from the "-lib" command-line option, and thus
+  ;; should use the LOCALE to decode the string.
+  (when *cmucl-lib*
+    (setf *cmucl-lib*
+	  (stream:string-decode *cmucl-lib* file-locale)))
+  ;; This also needs more work since the core path could come from the
+  ;; "-core" command-line option and should thus use LOCALE to decode
+  ;; the string.  It could also come from the "CMUCLCORE" envvar.
+  (setf *cmucl-core-path*
+	(stream:string-decode *cmucl-core-path* file-locale))
+  ;; *unidata-path* defaults to a pathname object, but the user can
+  ;; specify a path, so we need to decode the string path if given.
+  (when (and *unidata-path* (stringp *unidata-path*))
+    (setf *unidata-path*
+	  (stream:string-decode *unidata-path* file-locale))))
+
 (defun save-lisp (core-file-name &key
 				 (purify t)
 				 (root-structures ())
@@ -278,10 +306,9 @@
 	     ;; Load external format aliases now so we can aliases to
 	     ;; specify the external format.
 	     (stream::load-external-format-aliases)
-	     ;; Set the locale for lisp
-	     (intl::setlocale)
 	     ;; Set up :locale format
 	     (set-up-locale-external-format)
+<<<<<<< HEAD
 	     ;; Set terminal encodings to :locale
 	     (set-system-external-format :locale)
 	     #+darwin
@@ -295,6 +322,18 @@
 	       (lisp::load-decomp)
 	       (lisp::load-combining)
 	       (setf *enable-darwin-path-normalization* t))
+=======
+	     ;; Set terminal encodings to :locale and filename encoding to :utf-8.
+	     ;; (This needs more work on Darwin.)
+	     (set-system-external-format :locale :utf-8)
+	     (decode-runtime-strings :locale :utf-8)
+	     ;; Need to reinitialize the environment again because
+	     ;; we've possibly changed the environment variables and
+	     ;; pathnames.
+	     (environment-init)
+	     ;; Set the locale for lisp
+	     (intl::setlocale)
+>>>>>>> master
 	     (ext::process-command-strings process-command-line)
 	     (setf *editor-lisp-p* nil)
 	     (macrolet ((find-switch (name)


=====================================
src/code/unix.lisp
=====================================
@@ -25,17 +25,22 @@
 ;; it must be set to :iso8859-1 (or left as NIL), making files with
 ;; non-Latin-1 characters "mojibake", but otherwise they'll be inaccessible.
 ;; Must be set to NIL initially to enable building Lisp!
-(defvar *filename-encoding* nil)
+(defvar *filename-encoding* :null
+  "The encoding to use for converting a namestring to a string that can
+  be used by the operations system.  It must be a valid
+  external-format name or :NULL.  :NULL means the string
+  is passed as is to the operating system.  The operating system will
+  get the low 8 bits of each UTF-16 code unit of the string.")
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (defmacro %name->file (string)
-    `(if *filename-encoding*
-	 (string-encode ,string *filename-encoding*)
-	 ,string))
+    `(if (eql *filename-encoding* :null)
+	 ,string
+	 (string-encode ,string *filename-encoding*)))
   (defmacro %file->name (string)
-    `(if *filename-encoding*
-	 (string-decode ,string *filename-encoding*)
-	 ,string)))
+    `(if (eql *filename-encoding* :null)
+	 ,string
+	 (string-decode ,string *filename-encoding*))))
 
 
 ;;;; Common machine independent structures.


=====================================
src/i18n/locale/cmucl-unix.pot
=====================================
@@ -15,6 +15,15 @@ msgstr ""
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 
+#: src/code/unix.lisp
+msgid ""
+"The encoding to use for converting a namestring to a string that can\n"
+"  be used by the operations system.  It must be a valid\n"
+"  external-format name or :NULL.  :NULL means the string\n"
+"  is passed as is to the operating system.  The operating system will\n"
+"  get the low 8 bits of each UTF-16 code unit of the string."
+msgstr ""
+
 #: src/code/unix.lisp
 msgid "Syscall ~A failed: ~A"
 msgstr ""


=====================================
tests/issues.lisp
=====================================
@@ -258,6 +258,13 @@
 	(assert-equal (map 'list #'char-code out-string)
 		      (map 'list #'char-code expected))))))
 
+(define-test issue.25c-setup
+    (:tag :issues)
+  ;; Get the external format before running the test issue.25c.  See
+  ;; issue #161
+  ;; (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/161).
+  (assert-true (stream::find-external-format :utf16-be)))
+
 (define-test issue.25c
     (:tag :issues)
   ;; Modified test to verify that each octet read from run-program is
@@ -409,9 +416,12 @@
 ;; running a pipeline with linux, but otherwise enable it.  The
 ;; pipeline defines the envvar GITLAB_CI so check for that.
 ;;
+;; This also fails on Darwin CI now.  Let's just disable the test if
+;; running on CI.
+;;
 ;; It would be better if lisp-unit had a way of marking tests as known
 ;; failures, but it doesn't.
-#+#.(cl:if (cl:and (ext:featurep :linux) (unix:unix-getenv "GITLAB_CI")) '(or) '(and))
+#+#.(cl:if (cl:and (unix:unix-getenv "GITLAB_CI")) '(or) '(and))
 (define-test issue.41.1
     (:tag :issues)
   (issue-41-tester unix:sigstop))
@@ -682,10 +692,7 @@
   ;; work and not return NIL.
   (assert-true (file-author "."))
   (assert-true (file-author "bin/build.sh"))
-  (let ((unix::*filename-encoding* :utf-8))
-    ;; Set filename encoding to utf-8 so that we can encode the
-    ;; filename properly.
-    (assert-true
+  (assert-true
    (file-author
     (merge-pathnames 
      (concatenate 'string
@@ -696,7 +703,7 @@
 		  '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha
 		    #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga)
 		  ".txt")
-     *test-path*)))))
+     *test-path*))))
 
 (define-test issue.139-default-external-format
     (:tag :issues)



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/08496138d12cbf3f2b839d44003f2042401f4600...e5a4b66d871395517b7c29bbf580c514da871f4e

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/08496138d12cbf3f2b839d44003f2042401f4600...e5a4b66d871395517b7c29bbf580c514da871f4e
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20230215/71c36556/attachment-0001.html>


More information about the cmucl-cvs mailing list