[Git][cmucl/cmucl][issue-139-add-alias-local-external-format] 3 commits: Change :iso-8859-1 to :iso8859-1 in find-encoding

Raymond Toy (@rtoy) gitlab at common-lisp.net
Tue Nov 1 19:39:04 UTC 2022



Raymond Toy pushed to branch issue-139-add-alias-local-external-format at cmucl / cmucl


Commits:
88f6852f by Raymond Toy at 2022-11-01T12:04:55-07:00
Change :iso-8859-1 to :iso8859-1 in find-encoding

While there's an alias for `:iso-8859-1`, it's safer to use
`:iso8859-1` which is builtin.  Using `:iso-8859-1` requires the alias
database to be loaded, which isn't (currently) guaranteed when
`find-encoding` is called.  Thus use the builtin name instead.
Besides, `:iso8859-1` is used in other places in "intl.lisp".

(This is hard to test, but I noticed it when running
```
LANG=ko_KR.utf8 lisp
```
on the branch `issue-139-add-alias-local-external-format`.)

- - - - -
35b1282d by Raymond Toy at 2022-11-01T12:08:51-07:00
Merge branch 'master' into issue-139-add-alias-local-external-format

- - - - -
7b17a82e by Raymond Toy at 2022-11-01T12:38:48-07:00
Add interface unix-get-locale-codeset

* src/lisp/os-common.c
  * Add the function `os_get_locale_codeset` to get the codeset for our
    locale.
    
* src/code/unix.lisp
  * Add alien interface to `os_get_locale_codeset`, named `unix-get-locale-codeset`.

* src/code/save.lisp
  * Use `unix-get-locale-codeset` to figure out how to set up an alias
    for the external format named `:locale`.  Set this up in the
    initial function when we save lisp.

- - - - -


4 changed files:

- src/code/intl.lisp
- src/code/save.lisp
- src/code/unix.lisp
- src/lisp/os-common.c


Changes:

=====================================
src/code/intl.lisp
=====================================
@@ -105,7 +105,7 @@
 
 (defun find-encoding (domain)
   (when (null (domain-entry-encoding domain))
-    (setf (domain-entry-encoding domain) :iso-8859-1)
+    (setf (domain-entry-encoding domain) :iso8859-1)
     ;; Domain lookup can call the compiler, so set the locale to "C"
     ;; so things work.
     (let* ((*locale* "C")


=====================================
src/code/save.lisp
=====================================
@@ -145,53 +145,25 @@
 (defun set-up-locale-external-format ()
   "Add external format alias for :locale to the format specified by
   the envvar LANG and friends if available."
-  ;; Find the envvar that will tell us what encoding to use.
-  ;;
-  ;; See https://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html
-  ;;
-  (let* ((lang (or (unix:unix-getenv "LC_ALL")
-                   (unix:unix-getenv "LC_MESSAGES")
-                   (unix:unix-getenv "LANG")))
-         (length (length lang)))
-    ;; If LANG isn't set, just set :locale to alias to the
-    ;; default-external-format.
-    (unless lang
-      (setf (gethash :locale stream::*external-format-aliases*) *default-external-format*)
-      (return-from set-up-locale-external-format (values)))
-    ;; Extract the external format from the envvar and set up the
-    ;; :locale alias.
-    (let ((new-alias
-	    (cond
-	      ((or (string-equal "C" lang :end2 (min 1 length))
-		   (string-equal "POSIX" lang :end2 (min 5 length)))
-	       ;; If the lang is "C" or "POSIX", ignoring anything after
-	       ;; that, default to :iso8859-1.
-	       :iso8859-1)
-	      ((string-equal "/" lang :end2 (min 1 length))
-	       ;; Also, we don't handle the case where the locale starts
-	       ;; with a slash which means a pathname to a file created by
-	       ;; the localedef utility.  So use our defaults for that case
-	       ;; as well.
-	       :iso8859-1)
-	      (t
-	       ;; Simple parsing of LANG.  We assume it looks like
-	       ;; "language[_territory][.codeset]".  We're only interested
-	       ;; in the codeset, if given.  Some LC_ vars also have an
-	       ;; optional @modifier after the codeset; we ignore that too.
-	       (let ((dot (position #\. lang))
-		     (at (or (position #\@ lang) nil)))
-		 (when dot
-		   (let* ((codeset (subseq lang (1+ dot) at))
-			  (format (intern codeset "KEYWORD")))
-		     (cond ((stream::find-external-format format nil)
-			    format)
-			   (t
-			    (warn "Unknown or unsupported external format: ~S"
-				  codeset)
-			    *default-external-format*)))))))))
-      (assert new-alias)
-      (setf (gethash :locale stream::*external-format-aliases*) new-alias))
-    (values)))
+  (let ((codeset (unix::unix-get-locale-codeset)))
+    (cond ((zerop (length codeset))
+	   ;; Codeset was the empty string, so just set :locale to
+	   ;; alias to the default external format.  
+	   (setf (gethash :locale stream::*external-format-aliases*)
+		 *default-external-format*))
+	  (t
+	   (let ((codeset-format (intern codeset "KEYWORD")))
+	     ;; If we know the format, we can set the alias.
+	     ;; Otherwise, print a warning and use :iso8859-1 as the
+	     ;; alias.
+	     (setf (gethash :locale stream::*external-format-aliases*)
+		   (if (stream::find-external-format codeset-format nil)
+		       codeset-format
+		       (progn
+			 (warn "Unsupported external format; using :iso8859-1 instead: ~S"
+			       codeset-format)
+			 :iso8859-1)))))))
+  (values))
 
  
 (defun save-lisp (core-file-name &key
@@ -301,6 +273,7 @@
 	     (reinit)
 	     (environment-init)
 	     (dolist (f *after-save-initializations*) (funcall f))
+	     (stream::load-external-format-aliases)
 	     (intl::setlocale)
 	     (ext::process-command-strings process-command-line)
 	     (setf *editor-lisp-p* nil)


=====================================
src/code/unix.lisp
=====================================
@@ -2893,3 +2893,13 @@
    of the child in the parent if it works, or NIL and an error number if it
    doesn't work."
   (int-syscall ("fork")))
+
+(defun unix-get-locale-codeset ()
+  _N"Get the codeset from the locale"
+  (with-alien ((codeset (array c-call:char 512)))
+    (alien-funcall
+	    (extern-alien "os_get_locale_codeset"
+			  (function void (* char) int))
+	    (cast codeset (* c-call:char))
+	    512)
+    (cast codeset c-string)))


=====================================
src/lisp/os-common.c
=====================================
@@ -7,6 +7,8 @@
 
 #include <assert.h>
 #include <errno.h>
+#include <langinfo.h>
+#include <locale.h>
 #include <math.h>
 #include <netdb.h>
 #include <pwd.h>
@@ -773,3 +775,15 @@ exit:
     
     return result;
 }
+
+void
+os_get_locale_codeset(char* codeset, int len)
+{
+    char *code;
+    
+    setlocale(LC_ALL, "");
+
+    code = nl_langinfo(CODESET);
+
+    strncpy(codeset, code, len);
+}



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/69f2a9909142a2e6d5b91db6853c956f6d16cb5b...7b17a82e5dbf2f33dd30cf28c6de13ebb3fbc370

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/69f2a9909142a2e6d5b91db6853c956f6d16cb5b...7b17a82e5dbf2f33dd30cf28c6de13ebb3fbc370
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20221101/d9deda5f/attachment-0001.html>


More information about the cmucl-cvs mailing list