[Git][cmucl/cmucl][issue-158-darwin-pathnames] Fix bootstrap issues in pathname normalization for Darwin

Raymond Toy (@rtoy) gitlab at common-lisp.net
Wed Dec 21 17:40:16 UTC 2022



Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl


Commits:
d7a1099d by Raymond Toy at 2022-12-21T09:35:37-08:00
Fix bootstrap issues in pathname normalization for Darwin

We create a pathname in `filesys-init` during loading of the
kernel.core.  Unicode isn't ready at this time, so we can't do
pathname normalization yet.

Add `*enable-normalization*` to control this.  Defaults to `NIL` and
gets set to `T` in the initial-function.

This also requires that we load up the Unicode decomp and combining
tables before setting `*enable-normalization*` to `T`.

For testing, this is enabled on Linux where I do my development.
We'll have to reorder this when this is working.

- - - - -


4 changed files:

- src/code/lispinit.lisp
- src/code/pathname.lisp
- src/code/save.lisp
- src/code/unidata.lisp


Changes:

=====================================
src/code/lispinit.lisp
=====================================
@@ -308,7 +308,8 @@
 ;;; in Unwind-Protects will get executed.
 
 (declaim (special *lisp-initialization-functions*
-		  *load-time-values*))
+		  *load-time-values*
+		  *enable-normalization*))
 
 (eval-when (compile)
   (defmacro print-and-call (name)
@@ -344,6 +345,7 @@
   (setf *type-system-initialized* nil)
   (setf *break-on-signals* nil)
   (setf unix::*filename-encoding* nil)
+  (setf *enable-normalization* nil)
   #+gengc (setf conditions::*handler-clusters* nil)
   (setq intl::*default-domain* "cmucl")
   (setq intl::*locale* "C")


=====================================
src/code/pathname.lisp
=====================================
@@ -252,6 +252,24 @@
 ;;; This constructor is used to make an instance of the correct type
 ;;; from parsed arguments.
 
+(defvar *enable-normalization* nil)
+
+(defun normalize-name (piece)
+  ;; Normalize Darwin pathnames by converting Hangul
+  ;; syllables to conjoining jamo, and converting the
+  ;; string to NFD form, but skipping over a range of
+  ;; characters.
+  (typecase piece
+    (string
+     (if *enable-normalization*
+	 (decompose (unicode::decompose-hangul piece)
+		    :compatibility nil
+		    :darwinp t)
+	 piece))
+    (t
+     ;; What should we do about lisp::pattern objects?
+     piece)))
+  
 (defun %make-pathname-object (host device directory name type version)
   (if (typep host 'logical-host)
       (flet ((upcasify (thing)
@@ -271,24 +289,16 @@
 				(upcasify name)
 				(upcasify type)
 				(upcasify version)))
-      #-darwin
+      #-(not nil)
       (%make-pathname host device directory name type version)
-      #+darwin
-      (flet ((normalize-name (string)
-	       ;; Normalize Darwin pathnames by converting Hangul
-	       ;; syllables to conjoining jamo, and converting the
-	       ;; string to NFD form, but skipping over a range of
-	       ;; characters.
-	       (decompose (with-output-to-string (s)
-			    (unicode::decompose-hangul string s))
-			  :compatibility nil
-			  :darwinp t)))
-	(%make-pathname host device
-			(list (car directory)
-			      (mapcar #'normalize-name (cdr directory)))
-			(normalize-name name)
-			(normalize-name type)
-			version))))
+      #+(not nil)
+      (%make-pathname host device
+		      (when directory
+			(list* (car directory)
+			       (mapcar #'normalize-name (cdr directory))))
+		      (normalize-name name)
+		      (normalize-name type)
+		      version))))
 
 ;;; *LOGICAL-HOSTS* --internal.
 ;;;


=====================================
src/code/save.lisp
=====================================
@@ -284,6 +284,16 @@
 	     (set-up-locale-external-format)
 	     ;; Set terminal encodings to :locale
 	     (set-system-external-format :locale)
+	     ;; Get some unicode stuff needed for decomposing strings.
+	     ;; This is needed on Darwin to normalize pathname
+	     ;; objects, which needs this information.  If we don't,
+	     ;; we'll load the information at runtime when creating
+	     ;; the path to "unidata.dat", which then calls decompose
+	     ;; again, and so on.
+	     (progn
+	       (lisp::load-decomp)
+	       (lisp::load-combining)
+	       (setf *enable-normalization* t))
 	     (ext::process-command-strings process-command-line)
 	     (setf *editor-lisp-p* nil)
 	     (macrolet ((find-switch (name)


=====================================
src/code/unidata.lisp
=====================================
@@ -513,11 +513,12 @@
 		    (read-vector lvec stm :endian-swap :network-order)
 		    (values split hvec mvec lvec))))
 	 (declare (ignorable #'read16 #'read32 #'read-ntrie))
-	 (with-open-file (,stm *unidata-path* :direction :input
-			       :element-type '(unsigned-byte 8))
-	   (unless (unidata-locate ,stm ,locn)
-	     (error (intl:gettext "No data in file.")))
-	   , at body)))))
+	 (let (#+nil(lisp::*enable-normalization* nil))
+	   (with-open-file (,stm *unidata-path* :direction :input
+						:element-type '(unsigned-byte 8))
+	     (unless (unidata-locate ,stm ,locn)
+	       (error (intl:gettext "No data in file.")))
+	     , at body))))))
 
 (defloader load-range (stm 0)
   (let* ((n (read32 stm))



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/d7a1099dd6c0ed5540a46f1d5b60ebfe66886a82

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/d7a1099dd6c0ed5540a46f1d5b60ebfe66886a82
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20221221/7f2c859c/attachment-0001.html>


More information about the cmucl-cvs mailing list