[Git][cmucl/cmucl][issue-141-locale] 13 commits: Fix #147: Add method for stream-line-column

Raymond Toy (@rtoy) gitlab at common-lisp.net
Sat Nov 5 02:13:33 UTC 2022



Raymond Toy pushed to branch issue-141-locale at cmucl / cmucl


Commits:
e8a0cc6c by Raymond Toy at 2022-10-30T15:03:27+00:00
Fix #147:  Add method for stream-line-column

- - - - -
0dad5a1a by Raymond Toy at 2022-10-30T15:03:28+00:00
Merge branch 'issue-147-stream-line-column-impl' into 'master'

Fix #147:  Add method for stream-line-column

Closes #147

See merge request cmucl/cmucl!104
- - - - -
1300830b by Raymond Toy at 2022-10-31T17:12:48+00:00
Address #139: *default-external-format* is :utf-8

- - - - -
649a4f1e by Raymond Toy at 2022-10-31T17:12:49+00:00
Merge branch 'issue-139-default-external-format-utf8' into 'master'

Address #139: *default-external-format* is :utf-8

See merge request cmucl/cmucl!103
- - - - -
88f6852f by Raymond Toy at 2022-11-01T12:04:55-07:00
Change :iso-8859-1 to :iso8859-1 in find-encoding

While there's an alias for `:iso-8859-1`, it's safer to use
`:iso8859-1` which is builtin.  Using `:iso-8859-1` requires the alias
database to be loaded, which isn't (currently) guaranteed when
`find-encoding` is called.  Thus use the builtin name instead.
Besides, `:iso8859-1` is used in other places in "intl.lisp".

(This is hard to test, but I noticed it when running
```
LANG=ko_KR.utf8 lisp
```
on the branch `issue-139-add-alias-local-external-format`.)

- - - - -
d5f1aa5e by Raymond Toy at 2022-11-01T20:35:49+00:00
Update release-21e.md with closed issues.
- - - - -
402c0c01 by Raymond Toy at 2022-11-02T01:00:20+00:00
Fix #150: add aliases cp949 euckr

- - - - -
d825aa54 by Raymond Toy at 2022-11-02T01:00:20+00:00
Merge branch 'issue-150-add-aliases-cp949-euckr' into 'master'

Fix #150: add aliases cp949 euckr

Closes #150

See merge request cmucl/cmucl!106
- - - - -
33c760fa by Raymond Toy at 2022-11-03T04:47:09+00:00
Fix #149:  Call setlocale(3C) on startup

- - - - -
317a33f8 by Raymond Toy at 2022-11-03T04:47:10+00:00
Merge branch 'issue-149-add-setlocale' into 'master'

Fix #149:  Call setlocale(3C) on startup

Closes #149

See merge request cmucl/cmucl!105
- - - - -
390f8f3f by Raymond Toy at 2022-11-04T18:14:01-07:00
Update release notes

- - - - -
2c5282bf by Raymond Toy at 2022-11-04T18:48:48-07:00
Merge branch 'master' into issue-141-locale

- - - - -
1dbc1061 by Raymond Toy at 2022-11-04T19:13:13-07:00
Implement unix-getlocale and use it

* lisp/os-common.c
  * Implement os_getlocale to get the current locale via setlocale(3C)
* code/unix.lisp
  * Define function unix-getlocale to call os_getlocale
* code/intl.lisp
  * Use unix-getlocale to get the locale instead of geting the
    different environment variables.
* i18n/locale/cmucl-unix.pot
  * Update because of the new docstring

- - - - -


12 changed files:

- src/code/extfmts.lisp
- src/code/intl.lisp
- src/code/save.lisp
- src/code/unix.lisp
- src/general-info/release-21e.md
- src/i18n/locale/cmucl-unix.pot
- src/lisp/os-common.c
- src/pcl/gray-streams.lisp
- src/pcl/simple-streams/external-formats/aliases
- + tests/.gitignore
- tests/issues.lisp
- + tests/utf8.txt


Changes:

=====================================
src/code/extfmts.lisp
=====================================
@@ -22,7 +22,7 @@
 	  describe-external-format))
 
 (defvar *default-external-format*
-  :iso8859-1
+  :utf-8
   "The default external format to use if no other external format is
   specified")
 


=====================================
src/code/intl.lisp
=====================================
@@ -105,7 +105,7 @@
 
 (defun find-encoding (domain)
   (when (null (domain-entry-encoding domain))
-    (setf (domain-entry-encoding domain) :iso-8859-1)
+    (setf (domain-entry-encoding domain) :iso8859-1)
     ;; Domain lookup can call the compiler, so set the locale to "C"
     ;; so things work.
     (let* ((*locale* "C")
@@ -519,18 +519,8 @@
     (if (equal val "") nil val)))
 
 (defun setlocale (&optional locale)
-  (let ((env-locale (or locale
-			(getenv "LANGUAGE")
-			(getenv "LC_ALL")
-			(getenv "LC_MESSAGES")
-			(getenv "LANG"))))
-    (cond
-      ((and (plusp (length env-locale))
-	    (char-equal #\/ (aref env-locale 0)))
-       (warn "Locale not changed due to unsupported locale: ~S" env-locale))
-      (t
-       (setf *locale* (or env-locale
-			  *locale*))))))
+  (setf *locale* (or (unix::unix-getlocale)
+		     *locale*)))
 
 (defmacro textdomain (domain)
   `(eval-when (:compile-toplevel :execute)


=====================================
src/code/save.lisp
=====================================
@@ -249,6 +249,10 @@
 	     (reinit)
 	     (environment-init)
 	     (dolist (f *after-save-initializations*) (funcall f))
+	     ;; Set the runtime locale
+	     (unless (zerop (unix::unix-setlocale))
+	       (warn "os_setlocale failed"))
+	     ;; Set the locale for lisp
 	     (intl::setlocale)
 	     (ext::process-command-strings process-command-line)
 	     (setf *editor-lisp-p* nil)


=====================================
src/code/unix.lisp
=====================================
@@ -2893,3 +2893,25 @@
    of the child in the parent if it works, or NIL and an error number if it
    doesn't work."
   (int-syscall ("fork")))
+
+(defun unix-setlocale ()
+  _N"Call setlocale(3c) with fixed args.  Returns 0 on success."
+  (alien:alien-funcall
+   (alien:extern-alien "os_setlocale"
+		       (function c-call:int))))
+
+(defun unix-getlocale ()
+  _N"Get the current locale.  If we can't, return NIL.  A call to
+  UNIX-SETLOCALE must have been done previously before calling this so
+  that the correct locale is returned."
+  (with-alien ((buf (array c-call:char 256)))
+    (let ((result
+	    (alien-funcall
+	     (extern-alien "os_getlocale"
+			   (function c-call:int
+				     (* c-call:char)
+				     c-call:int))
+	     (cast buf (* c-call:char))
+	     256)))
+      (when (zerop result)
+	(cast buf c-call:c-string)))))


=====================================
src/general-info/release-21e.md
=====================================
@@ -22,6 +22,7 @@ public domain.
   * Feature enhancements
   * Changes
     * Update to ASDF 3.3.6
+    * The default external format is `:utf-8` instead of `:iso8859-1`
   * ANSI compliance fixes:
   * Bug fixes:
     * ~~#97~~ Fixes stepping through the source forms in the debugger.  This has been broken for quite some time, but it works now.
@@ -50,13 +51,19 @@ public domain.
     * ~~#113~~ REQUIRE on contribs can pull in the wrong things via ASDF..
     * ~~#121~~ Wrong column index in FILL-POINTER-OUTPUT-STREAM
     * ~~#122~~ gcc 11 can't build cmucl
+    * ~~#124~~ directory with `:wild-inferiors` doesn't descend subdirectories 
     * ~~#125~~ Linux `unix-stat` returning incorrect values
     * ~~#127~~ Linux unix-getpwuid segfaults when given non-existent uid..
     * ~~#128~~ `QUIT` accepts an exit code
+    * ~~#130~~ Move file-author to C 
     * ~~#132~~ Ansi test `RENAME-FILE.1` no fails
     * ~~#134~~ Handle the case of `(expt complex complex-rational)`
     * ~~#136~~ `ensure-directories-exist` should return the given pathspec
+    * #139 `*default-external-format*` defaults to `:utf-8`
+    * ~~#141~~ Disallow locales that are pathnames to a localedef file
     * ~~#142~~ `(random 0)` signals incorrect error
+    * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream`
+    * ~~#149~~ Call setlocale(3C) on startup
   * Other changes:
   * Improvements to the PCL implementation of CLOS:
   * Changes to building procedure:


=====================================
src/i18n/locale/cmucl-unix.pot
=====================================
@@ -1424,3 +1424,14 @@ msgid ""
 "   doesn't work."
 msgstr ""
 
+#: src/code/unix.lisp
+msgid "Call setlocale(3c) with fixed args.  Returns 0 on success."
+msgstr ""
+
+#: src/code/unix.lisp
+msgid ""
+"Get the current locale.  If we can't, return NIL.  A call to\n"
+"  UNIX-SETLOCALE must have been done previously before calling this so\n"
+"  that the correct locale is returned."
+msgstr ""
+


=====================================
src/lisp/os-common.c
=====================================
@@ -7,6 +7,7 @@
 
 #include <assert.h>
 #include <errno.h>
+#include <locale.h>
 #include <math.h>
 #include <netdb.h>
 #include <pwd.h>
@@ -773,3 +774,25 @@ exit:
     
     return result;
 }
+
+int
+os_setlocale(void)
+{
+    char *result = setlocale(LC_ALL, "");
+
+    /* Return 0 if setlocale suceeded; otherwise -1. */
+    return result != NULL ? 0 : -1;
+}
+
+int
+os_getlocale(char *buf, int len)
+{
+    char *locale = setlocale(LC_ALL, NULL);
+    if (locale) {
+        strncpy(buf, locale, len - 1);
+        buf[len - 1] = '\0';
+    }
+
+    /* Return -1 if setlocale failed. */
+    return locale ? 0 : -1;
+}


=====================================
src/pcl/gray-streams.lisp
=====================================
@@ -235,6 +235,9 @@
   defined for this function, although it is permissible for it to
   always return NIL."))
 
+(defmethod stream-line-column ((stream fundamental-character-output-stream))
+  nil)
+
 ;;; Stream-line-length is a CMUCL extension to Gray streams.
 (defgeneric stream-line-length (stream)
   (:documentation _N"Return the stream line length or Nil."))


=====================================
src/pcl/simple-streams/external-formats/aliases
=====================================
@@ -223,6 +223,8 @@ windows-cp1252	cp1252
 windows-latin1	cp1252
 ms-ansi		cp1252
 
+euckr		euc-kr
+cp949		euc-kr
 ;; These are not yet implemented
 ;;iso-2022-jp	iso2022-jp
 ;;iso2022jp	iso2022-jp


=====================================
tests/.gitignore
=====================================
@@ -0,0 +1 @@
+/out-utf8.txt


=====================================
tests/issues.lisp
=====================================
@@ -5,6 +5,12 @@
 
 (in-package "ISSUES-TESTS")
 
+(defparameter *test-path*
+  (merge-pathnames (make-pathname :name :unspecific :type :unspecific
+                                  :version :unspecific)
+                   *load-truename*)
+  "Path to where this file is.")
+
 (defun square (x)
   (expt x 2))
 
@@ -676,4 +682,73 @@
   ;; work and not return NIL.
   (assert-true (file-author "."))
   (assert-true (file-author "bin/build.sh"))
-  (assert-true (file-author "tests/안녕하십니까.txt")))
+  (let ((unix::*filename-encoding* :utf-8))
+    ;; Set filename encoding to utf-8 so that we can encode the
+    ;; filename properly.
+    (assert-true
+   (file-author
+    (merge-pathnames 
+     (concatenate 'string
+		  ;; Write the test file name this way so
+		  ;; that it's independent of the encoding
+		  ;; used to load this file.  The name is
+		  ;; "안녕하십니까".
+		  '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha
+		    #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga)
+		  ".txt")
+     *test-path*)))))
+
+(define-test issue.139-default-external-format
+    (:tag :issues)
+  (assert-eq :utf-8 stream:*default-external-format*))
+
+(define-test issue.139-default-external-format-read-file
+    (:tag :issues)
+  (let ((string (concatenate 'string
+			     ;; This is "hello" in Korean
+			     '(#\Hangul_syllable_an
+			       #\Hangul_Syllable_Nyeong
+			       #\Hangul_Syllable_Ha
+			       #\Hangul_Syllable_Se
+			       #\Hangul_Syllable_Yo))))
+    ;; Test that opening a file for reading uses the the default :utf8
+    ;; encoding.
+    (with-open-file (s (merge-pathnames "utf8.txt"
+					*test-path*)
+		       :direction :input)
+      ;; The first line should be "hello" in Hangul.
+      (assert-equal (map 'list #'char-name string)
+		    (map 'list #'char-name (read-line s))))))
+
+(define-test issue.139-default-external-format-write-file
+    (:tag :issues)
+  ;; Test that opening a file for writing uses the default :utf8.
+  ;; First write something out to the file.  Then read it back in
+  ;; using an explicit format of utf8 and verifying that we got the
+  ;; right contents.
+  (let ((string (concatenate 'string
+			     ;; This is "hello" in Korean
+			     '(#\Hangul_syllable_an
+			       #\Hangul_Syllable_Nyeong
+			       #\Hangul_Syllable_Ha
+			       #\Hangul_Syllable_Se
+			       #\Hangul_Syllable_Yo))))
+    (with-open-file (s (merge-pathnames "out-utf8.txt"
+					*test-path*)
+		       :direction :output
+		       :if-exists :supersede)
+      (write-line string s))
+    (with-open-file (s (merge-pathnames "out-utf8.txt"
+					*test-path*)
+		       :direction :input
+		       :external-format :utf-8)
+      (assert-equal (map 'list #'char-name string)
+		    (map 'list #'char-name (read-line s))))))
+  
+
+(define-test issue.150
+    (:tag :issues)
+  (let ((ext:*gc-verbose* nil)
+	(*compile-print* nil))
+    (assert-true (stream::find-external-format :euckr))
+    (assert-true (stream::find-external-format :cp949))))


=====================================
tests/utf8.txt
=====================================
@@ -0,0 +1,2 @@
+안녕하세요
+UTF8 test.  The above line is "Hello" in Hangul.



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6e975c79c794bb61d18fa0bafdf04cdca674a317...1dbc106133d6291bc77f08471d27f271b76bda1e

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6e975c79c794bb61d18fa0bafdf04cdca674a317...1dbc106133d6291bc77f08471d27f271b76bda1e
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20221105/4c022a3f/attachment-0001.html>


More information about the cmucl-cvs mailing list