Raymond Toy pushed to branch issue-139-set-filename-encoding-to-utf8 at cmucl / cmucl
Commits:
77325992 by Raymond Toy at 2023-01-10T15:47:44-08:00
Undo unneeded indentation.
`#+executable` was indented one space, but that's not relevant to this
change, so undo it.
- - - - -
20bedbc8 by Raymond Toy at 2023-01-10T15:49:21-08:00
Actually set encoding to NIL, per comment.
Not sure what happened here.
- - - - -
2 changed files:
- src/code/extfmts.lisp
- src/code/save.lisp
Changes:
=====================================
src/code/extfmts.lisp
=====================================
@@ -493,7 +493,7 @@
;; encoding to NIL because we don't need any special
;; encoding to open the format files.
(let* ((*print-readably* nil)
- ;;(unix::*filename-encoding* nil)
+ (unix::*filename-encoding* nil)
(*package* (find-package "STREAM"))
(lisp::*enable-package-locked-errors* nil)
(s (open (format nil "ext-formats:~(~A~).lisp" name)
=====================================
src/code/save.lisp
=====================================
@@ -202,7 +202,7 @@
(site-init "library:site-init")
(print-herald t)
(process-command-line t)
- #+:executable
+ #+:executable
(executable nil)
(batch-mode nil)
(quiet nil))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/5aa5c037c5086f89ca5025…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/5aa5c037c5086f89ca5025…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-139-set-filename-encoding-to-utf8 at cmucl / cmucl
Commits:
5aa5c037 by Raymond Toy at 2022-12-21T12:58:49-08:00
Add docstring for *filename-encoding*
- - - - -
1 changed file:
- src/code/unix.lisp
Changes:
=====================================
src/code/unix.lisp
=====================================
@@ -25,7 +25,12 @@
;; it must be set to :iso8859-1 (or left as NIL), making files with
;; non-Latin-1 characters "mojibake", but otherwise they'll be inaccessible.
;; Must be set to NIL initially to enable building Lisp!
-(defvar *filename-encoding* nil)
+(defvar *filename-encoding* nil
+ "The encoding to use for converting a namestring to a string that can
+ be used by the operations system. It must be a valid
+ external-format name or NIL. NIL means the string is passed as is
+ to the operating system. The operating system will get the low 8
+ bits of each UTF-16 code unit of the string.")
(eval-when (:compile-toplevel :load-toplevel :execute)
(defmacro %name->file (string)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/5aa5c037c5086f89ca50258…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/5aa5c037c5086f89ca50258…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl
Commits:
08496138 by Raymond Toy at 2022-12-21T11:27:36-08:00
Clean up implementation
Remove the function `normalize-name` since it's in an flet now.
Make `*enable-darwin-path-normalization*` only defined on darwin,
which is the only OS where's it's needed.
- - - - -
1 changed file:
- src/code/pathname.lisp
Changes:
=====================================
src/code/pathname.lisp
=====================================
@@ -252,6 +252,7 @@
;;; This constructor is used to make an instance of the correct type
;;; from parsed arguments.
+#+darwin
(defvar *enable-darwin-path-normalization* nil
"When non-NIL, pathnames are on Darwin are normalized when created.
Otherwise, the pathnames are unchanged.
@@ -259,22 +260,6 @@
This must be NIL during bootstrapping because Unicode is not yet
available.")
-(defun normalize-name (piece)
- ;; Normalize Darwin pathnames by converting Hangul
- ;; syllables to conjoining jamo, and converting the
- ;; string to NFD form, but skipping over a range of
- ;; characters.
- (typecase piece
- (string
- (if *enable-darwin-path-normalization*
- (decompose (unicode::decompose-hangul piece)
- :compatibility nil
- :darwinp t)
- piece))
- (t
- ;; What should we do about lisp::pattern objects?
- piece)))
-
(defun %make-pathname-object (host device directory name type version)
(if (typep host 'logical-host)
(flet ((upcasify (thing)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/08496138d12cbf3f2b839d4…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/08496138d12cbf3f2b839d4…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl
Commits:
361d18f9 by Raymond Toy at 2022-12-21T11:01:14-08:00
Fix typo normalizing directories.
We had written `(cdr directory)` but we really need just `directory`
so that we keep the `:absolute` or `:relative` part of the directory.
- - - - -
1 changed file:
- src/code/pathname.lisp
Changes:
=====================================
src/code/pathname.lisp
=====================================
@@ -310,10 +310,11 @@
:darwinp t)
piece))
(t
- ;; What should we do about lisp::pattern objects?
+ ;; What should we do about lisp::pattern objects
+ ;; that occur in the name component?
piece))))
(%make-pathname host device
- (mapcar #'normalize-name (cdr directory))
+ (mapcar #'normalize-name directory)
(normalize-name name)
(normalize-name type)
version))))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/361d18f9ed7f243bbc6ab88…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/361d18f9ed7f243bbc6ab88…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl
Commits:
be398f1a by Raymond Toy at 2022-12-21T10:17:05-08:00
Actually enable Darwin path normalization on Darwin.
Previously, we had this enabled on Linux for testing. Now switch over
to Darwin.
Rename `*enable-normalization*` to
`*enable-darwin-path-normalization*` to make it clearer. Add a
docstring too.
In save.lisp, only set it on darwin since it's not relevant to any
other system.
Fix up some compiler notes about unused variables in
`decompose-hangul-syllable`.
- - - - -
856f9a9f by Raymond Toy at 2022-12-21T10:21:08-08:00
Oops. Forgot to rename `*enable-normalization*` in save.lisp
- - - - -
4 changed files:
- src/code/pathname.lisp
- src/code/save.lisp
- src/code/unicode.lisp
- src/code/unidata.lisp
Changes:
=====================================
src/code/pathname.lisp
=====================================
@@ -252,7 +252,12 @@
;;; This constructor is used to make an instance of the correct type
;;; from parsed arguments.
-(defvar *enable-normalization* nil)
+(defvar *enable-darwin-path-normalization* nil
+ "When non-NIL, pathnames are on Darwin are normalized when created.
+ Otherwise, the pathnames are unchanged.
+
+ This must be NIL during bootstrapping because Unicode is not yet
+ available.")
(defun normalize-name (piece)
;; Normalize Darwin pathnames by converting Hangul
@@ -261,7 +266,7 @@
;; characters.
(typecase piece
(string
- (if *enable-normalization*
+ (if *enable-darwin-path-normalization*
(decompose (unicode::decompose-hangul piece)
:compatibility nil
:darwinp t)
@@ -289,16 +294,29 @@
(upcasify name)
(upcasify type)
(upcasify version)))
- #-(not nil)
+ #-darwin
(%make-pathname host device directory name type version)
- #+(not nil)
- (%make-pathname host device
- (when directory
- (list* (car directory)
- (mapcar #'normalize-name (cdr directory))))
- (normalize-name name)
- (normalize-name type)
- version))))
+ #+darwin
+ (flet ((normalize-name (piece)
+ ;; Normalize Darwin pathnames by converting Hangul
+ ;; syllables to conjoining jamo, and converting the
+ ;; string to NFD form, but skipping over a range of
+ ;; characters.
+ (typecase piece
+ (string
+ (if *enable-darwin-path-normalization*
+ (decompose (unicode::decompose-hangul piece)
+ :compatibility nil
+ :darwinp t)
+ piece))
+ (t
+ ;; What should we do about lisp::pattern objects?
+ piece))))
+ (%make-pathname host device
+ (mapcar #'normalize-name (cdr directory))
+ (normalize-name name)
+ (normalize-name type)
+ version))))
;;; *LOGICAL-HOSTS* --internal.
;;;
=====================================
src/code/save.lisp
=====================================
@@ -284,16 +284,17 @@
(set-up-locale-external-format)
;; Set terminal encodings to :locale
(set-system-external-format :locale)
- ;; Get some unicode stuff needed for decomposing strings.
- ;; This is needed on Darwin to normalize pathname
- ;; objects, which needs this information. If we don't,
- ;; we'll load the information at runtime when creating
- ;; the path to "unidata.dat", which then calls decompose
- ;; again, and so on.
+ #+darwin
(progn
+ ;; Get some unicode stuff needed for decomposing strings.
+ ;; This is needed on Darwin to normalize pathname
+ ;; objects, which needs this information. If we don't,
+ ;; we'll load the information at runtime when creating
+ ;; the path to "unidata.dat", which then calls decompose
+ ;; again, and so on.
(lisp::load-decomp)
(lisp::load-combining)
- (setf *enable-normalization* t))
+ (setf *enable-darwin-path-normalization* t))
(ext::process-command-strings process-command-line)
(setf *editor-lisp-p* nil)
(macrolet ((find-switch (name)
=====================================
src/code/unicode.lisp
=====================================
@@ -527,18 +527,19 @@
(l-base #x1100)
(v-base #x1161)
(t-base #x11a7)
- (s-count 11172)
- (l-count 19)
(v-count 21)
(t-count 28)
(n-count (* v-count t-count)))
;; Step 1: Compute index of the syllable S
(let ((s-index (- cp s-base)))
- ;; Step 2: If s is in the range 0 <= s <= s-count, the compute the components
+ ;; Step 2: If s is in the range 0 <= s <= s-count, the compute
+ ;; the components.
(let ((l (+ l-base (truncate s-index n-count)))
(v (+ v-base (truncate (mod s-index n-count) t-count)))
(tt (+ t-base (mod s-index t-count))))
- ;; Step 3: If tt = t-base, then there is no trailing character so replace s by the sequence <l,v>. Otherwise there is a trailing character, so replace s by the sequence <l,v,tt>
+ ;; Step 3: If tt = t-base, then there is no trailing character
+ ;; so replace s by the sequence <l,v>. Otherwise there is a
+ ;; trailing character, so replace s by the sequence <l,v,tt>.
(princ (code-char l) stream)
(princ (code-char v) stream)
(unless (= tt t-base)
=====================================
src/code/unidata.lisp
=====================================
@@ -513,12 +513,11 @@
(read-vector lvec stm :endian-swap :network-order)
(values split hvec mvec lvec))))
(declare (ignorable #'read16 #'read32 #'read-ntrie))
- (let (#+nil(lisp::*enable-normalization* nil))
- (with-open-file (,stm *unidata-path* :direction :input
- :element-type '(unsigned-byte 8))
- (unless (unidata-locate ,stm ,locn)
- (error (intl:gettext "No data in file.")))
- ,@body))))))
+ (with-open-file (,stm *unidata-path* :direction :input
+ :element-type '(unsigned-byte 8))
+ (unless (unidata-locate ,stm ,locn)
+ (error (intl:gettext "No data in file.")))
+ ,@body)))))
(defloader load-range (stm 0)
(let* ((n (read32 stm))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d7a1099dd6c0ed5540a46f…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d7a1099dd6c0ed5540a46f…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-158-darwin-pathnames at cmucl / cmucl
Commits:
d7a1099d by Raymond Toy at 2022-12-21T09:35:37-08:00
Fix bootstrap issues in pathname normalization for Darwin
We create a pathname in `filesys-init` during loading of the
kernel.core. Unicode isn't ready at this time, so we can't do
pathname normalization yet.
Add `*enable-normalization*` to control this. Defaults to `NIL` and
gets set to `T` in the initial-function.
This also requires that we load up the Unicode decomp and combining
tables before setting `*enable-normalization*` to `T`.
For testing, this is enabled on Linux where I do my development.
We'll have to reorder this when this is working.
- - - - -
4 changed files:
- src/code/lispinit.lisp
- src/code/pathname.lisp
- src/code/save.lisp
- src/code/unidata.lisp
Changes:
=====================================
src/code/lispinit.lisp
=====================================
@@ -308,7 +308,8 @@
;;; in Unwind-Protects will get executed.
(declaim (special *lisp-initialization-functions*
- *load-time-values*))
+ *load-time-values*
+ *enable-normalization*))
(eval-when (compile)
(defmacro print-and-call (name)
@@ -344,6 +345,7 @@
(setf *type-system-initialized* nil)
(setf *break-on-signals* nil)
(setf unix::*filename-encoding* nil)
+ (setf *enable-normalization* nil)
#+gengc (setf conditions::*handler-clusters* nil)
(setq intl::*default-domain* "cmucl")
(setq intl::*locale* "C")
=====================================
src/code/pathname.lisp
=====================================
@@ -252,6 +252,24 @@
;;; This constructor is used to make an instance of the correct type
;;; from parsed arguments.
+(defvar *enable-normalization* nil)
+
+(defun normalize-name (piece)
+ ;; Normalize Darwin pathnames by converting Hangul
+ ;; syllables to conjoining jamo, and converting the
+ ;; string to NFD form, but skipping over a range of
+ ;; characters.
+ (typecase piece
+ (string
+ (if *enable-normalization*
+ (decompose (unicode::decompose-hangul piece)
+ :compatibility nil
+ :darwinp t)
+ piece))
+ (t
+ ;; What should we do about lisp::pattern objects?
+ piece)))
+
(defun %make-pathname-object (host device directory name type version)
(if (typep host 'logical-host)
(flet ((upcasify (thing)
@@ -271,24 +289,16 @@
(upcasify name)
(upcasify type)
(upcasify version)))
- #-darwin
+ #-(not nil)
(%make-pathname host device directory name type version)
- #+darwin
- (flet ((normalize-name (string)
- ;; Normalize Darwin pathnames by converting Hangul
- ;; syllables to conjoining jamo, and converting the
- ;; string to NFD form, but skipping over a range of
- ;; characters.
- (decompose (with-output-to-string (s)
- (unicode::decompose-hangul string s))
- :compatibility nil
- :darwinp t)))
- (%make-pathname host device
- (list (car directory)
- (mapcar #'normalize-name (cdr directory)))
- (normalize-name name)
- (normalize-name type)
- version))))
+ #+(not nil)
+ (%make-pathname host device
+ (when directory
+ (list* (car directory)
+ (mapcar #'normalize-name (cdr directory))))
+ (normalize-name name)
+ (normalize-name type)
+ version))))
;;; *LOGICAL-HOSTS* --internal.
;;;
=====================================
src/code/save.lisp
=====================================
@@ -284,6 +284,16 @@
(set-up-locale-external-format)
;; Set terminal encodings to :locale
(set-system-external-format :locale)
+ ;; Get some unicode stuff needed for decomposing strings.
+ ;; This is needed on Darwin to normalize pathname
+ ;; objects, which needs this information. If we don't,
+ ;; we'll load the information at runtime when creating
+ ;; the path to "unidata.dat", which then calls decompose
+ ;; again, and so on.
+ (progn
+ (lisp::load-decomp)
+ (lisp::load-combining)
+ (setf *enable-normalization* t))
(ext::process-command-strings process-command-line)
(setf *editor-lisp-p* nil)
(macrolet ((find-switch (name)
=====================================
src/code/unidata.lisp
=====================================
@@ -513,11 +513,12 @@
(read-vector lvec stm :endian-swap :network-order)
(values split hvec mvec lvec))))
(declare (ignorable #'read16 #'read32 #'read-ntrie))
- (with-open-file (,stm *unidata-path* :direction :input
- :element-type '(unsigned-byte 8))
- (unless (unidata-locate ,stm ,locn)
- (error (intl:gettext "No data in file.")))
- ,@body)))))
+ (let (#+nil(lisp::*enable-normalization* nil))
+ (with-open-file (,stm *unidata-path* :direction :input
+ :element-type '(unsigned-byte 8))
+ (unless (unidata-locate ,stm ,locn)
+ (error (intl:gettext "No data in file.")))
+ ,@body))))))
(defloader load-range (stm 0)
(let* ((n (read32 stm))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/d7a1099dd6c0ed5540a46f1…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/d7a1099dd6c0ed5540a46f1…
You're receiving this email because of your account on gitlab.common-lisp.net.