Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl
Commits: be398f1a by Raymond Toy at 2022-12-21T10:17:05-08:00 Actually enable Darwin path normalization on Darwin.
Previously, we had this enabled on Linux for testing. Now switch over to Darwin.
Rename `*enable-normalization*` to `*enable-darwin-path-normalization*` to make it clearer. Add a docstring too.
In save.lisp, only set it on darwin since it's not relevant to any other system.
Fix up some compiler notes about unused variables in `decompose-hangul-syllable`.
- - - - - 856f9a9f by Raymond Toy at 2022-12-21T10:21:08-08:00 Oops. Forgot to rename `*enable-normalization*` in save.lisp
- - - - -
4 changed files:
- src/code/pathname.lisp - src/code/save.lisp - src/code/unicode.lisp - src/code/unidata.lisp
Changes:
===================================== src/code/pathname.lisp ===================================== @@ -252,7 +252,12 @@ ;;; This constructor is used to make an instance of the correct type ;;; from parsed arguments.
-(defvar *enable-normalization* nil) +(defvar *enable-darwin-path-normalization* nil + "When non-NIL, pathnames are on Darwin are normalized when created. + Otherwise, the pathnames are unchanged. + + This must be NIL during bootstrapping because Unicode is not yet + available.")
(defun normalize-name (piece) ;; Normalize Darwin pathnames by converting Hangul @@ -261,7 +266,7 @@ ;; characters. (typecase piece (string - (if *enable-normalization* + (if *enable-darwin-path-normalization* (decompose (unicode::decompose-hangul piece) :compatibility nil :darwinp t) @@ -289,16 +294,29 @@ (upcasify name) (upcasify type) (upcasify version))) - #-(not nil) + #-darwin (%make-pathname host device directory name type version) - #+(not nil) - (%make-pathname host device - (when directory - (list* (car directory) - (mapcar #'normalize-name (cdr directory)))) - (normalize-name name) - (normalize-name type) - version)))) + #+darwin + (flet ((normalize-name (piece) + ;; Normalize Darwin pathnames by converting Hangul + ;; syllables to conjoining jamo, and converting the + ;; string to NFD form, but skipping over a range of + ;; characters. + (typecase piece + (string + (if *enable-darwin-path-normalization* + (decompose (unicode::decompose-hangul piece) + :compatibility nil + :darwinp t) + piece)) + (t + ;; What should we do about lisp::pattern objects? + piece)))) + (%make-pathname host device + (mapcar #'normalize-name (cdr directory)) + (normalize-name name) + (normalize-name type) + version))))
;;; *LOGICAL-HOSTS* --internal. ;;;
===================================== src/code/save.lisp ===================================== @@ -284,16 +284,17 @@ (set-up-locale-external-format) ;; Set terminal encodings to :locale (set-system-external-format :locale) - ;; Get some unicode stuff needed for decomposing strings. - ;; This is needed on Darwin to normalize pathname - ;; objects, which needs this information. If we don't, - ;; we'll load the information at runtime when creating - ;; the path to "unidata.dat", which then calls decompose - ;; again, and so on. + #+darwin (progn + ;; Get some unicode stuff needed for decomposing strings. + ;; This is needed on Darwin to normalize pathname + ;; objects, which needs this information. If we don't, + ;; we'll load the information at runtime when creating + ;; the path to "unidata.dat", which then calls decompose + ;; again, and so on. (lisp::load-decomp) (lisp::load-combining) - (setf *enable-normalization* t)) + (setf *enable-darwin-path-normalization* t)) (ext::process-command-strings process-command-line) (setf *editor-lisp-p* nil) (macrolet ((find-switch (name)
===================================== src/code/unicode.lisp ===================================== @@ -527,18 +527,19 @@ (l-base #x1100) (v-base #x1161) (t-base #x11a7) - (s-count 11172) - (l-count 19) (v-count 21) (t-count 28) (n-count (* v-count t-count))) ;; Step 1: Compute index of the syllable S (let ((s-index (- cp s-base))) - ;; Step 2: If s is in the range 0 <= s <= s-count, the compute the components + ;; Step 2: If s is in the range 0 <= s <= s-count, the compute + ;; the components. (let ((l (+ l-base (truncate s-index n-count))) (v (+ v-base (truncate (mod s-index n-count) t-count))) (tt (+ t-base (mod s-index t-count)))) - ;; Step 3: If tt = t-base, then there is no trailing character so replace s by the sequence <l,v>. Otherwise there is a trailing character, so replace s by the sequence <l,v,tt> + ;; Step 3: If tt = t-base, then there is no trailing character + ;; so replace s by the sequence <l,v>. Otherwise there is a + ;; trailing character, so replace s by the sequence <l,v,tt>. (princ (code-char l) stream) (princ (code-char v) stream) (unless (= tt t-base)
===================================== src/code/unidata.lisp ===================================== @@ -513,12 +513,11 @@ (read-vector lvec stm :endian-swap :network-order) (values split hvec mvec lvec)))) (declare (ignorable #'read16 #'read32 #'read-ntrie)) - (let (#+nil(lisp::*enable-normalization* nil)) - (with-open-file (,stm *unidata-path* :direction :input - :element-type '(unsigned-byte 8)) - (unless (unidata-locate ,stm ,locn) - (error (intl:gettext "No data in file."))) - ,@body)))))) + (with-open-file (,stm *unidata-path* :direction :input + :element-type '(unsigned-byte 8)) + (unless (unidata-locate ,stm ,locn) + (error (intl:gettext "No data in file."))) + ,@body)))))
(defloader load-range (stm 0) (let* ((n (read32 stm))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d7a1099dd6c0ed5540a46f1...