Raymond Toy pushed to branch master at cmucl / cmucl
Commits: dbdec3a5 by Raymond Toy at 2023-01-13T23:33:47+00:00 Address #139: Set filename encoding to :utf-8
- - - - - d004986e by Raymond Toy at 2023-01-13T23:33:49+00:00 Merge branch 'issue-139-set-filename-encoding-to-utf8' into 'master'
Address #139: Set filename encoding to :utf-8
See merge request cmucl/cmucl!109 - - - - -
4 changed files:
- src/code/extfmts.lisp - src/code/save.lisp - src/code/unix.lisp - tests/issues.lisp
Changes:
===================================== src/code/extfmts.lisp ===================================== @@ -370,8 +370,10 @@ #() '())))))
(defun load-external-format-aliases () + ;; Set filename encoding to NIL to bypass any encoding; it's not + ;; needed to open the aliases file. NIL means the pathname string is passed as is where only the low 8 bits of the (let ((*package* (find-package "KEYWORD")) - (unix::*filename-encoding* :iso8859-1)) + (unix::*filename-encoding* nil)) (with-open-file (stm "ext-formats:aliases" :if-does-not-exist nil :external-format :iso8859-1) (when stm @@ -486,11 +488,16 @@ (and (consp name) (find-external-format name)) (and (with-standard-io-syntax ;; Use standard IO syntax so that changes by the user - ;; don't mess up compiling the external format. - (let ((*package* (find-package "STREAM")) - (lisp::*enable-package-locked-errors* nil) - (s (open (format nil "ext-formats:~(~A~).lisp" name) - :if-does-not-exist nil :external-format :iso8859-1))) + ;; don't mess up compiling the external format, but we + ;; don't need to print readably. Also, set filename + ;; encoding to NIL because we don't need any special + ;; encoding to open the format files. + (let* ((*print-readably* nil) + (unix::*filename-encoding* nil) + (*package* (find-package "STREAM")) + (lisp::*enable-package-locked-errors* nil) + (s (open (format nil "ext-formats:~(~A~).lisp" name) + :if-does-not-exist nil :external-format :iso8859-1))) (when s (null (nth-value 1 (ext:compile-from-stream s)))))) (gethash name *external-formats*))))
===================================== src/code/save.lisp ===================================== @@ -164,7 +164,35 @@ *default-external-format*)))) (values))
- +(defun decode-runtime-strings (locale file-locale) + ;; The C runtime can initialize the following strings from the + ;; command line or the environment. We need to decode these into + ;; the utf-16 strings that Lisp uses. + (setf lisp-command-line-list + (mapcar #'(lambda (s) + (stream:string-decode s locale)) + lisp-command-line-list)) + (setf lisp-environment-list + (mapcar #'(lambda (s) + (stream:string-decode s locale)) + lisp-environment-list)) + ;; This needs more work.. *cmucl-lib* could be set from the the envvar + ;; "CMUCLLIB" or from the "-lib" command-line option, and thus + ;; should use the LOCALE to decode the string. + (when *cmucl-lib* + (setf *cmucl-lib* + (stream:string-decode *cmucl-lib* file-locale))) + ;; This also needs more work since the core path could come from the + ;; "-core" command-line option and should thus use LOCALE to decode + ;; the string. It could also come from the "CMUCLCORE" envvar. + (setf *cmucl-core-path* + (stream:string-decode *cmucl-core-path* file-locale)) + ;; *unidata-path* defaults to a pathname object, but the user can + ;; specify a path, so we need to decode the string path if given. + (when (and *unidata-path* (stringp *unidata-path*)) + (setf *unidata-path* + (stream:string-decode *unidata-path* file-locale)))) + (defun save-lisp (core-file-name &key (purify t) (root-structures ()) @@ -278,12 +306,18 @@ ;; Load external format aliases now so we can aliases to ;; specify the external format. (stream::load-external-format-aliases) - ;; Set the locale for lisp - (intl::setlocale) ;; Set up :locale format (set-up-locale-external-format) - ;; Set terminal encodings to :locale - (set-system-external-format :locale) + ;; Set terminal encodings to :locale and filename encoding to :utf-8. + ;; (This needs more work on Darwin.) + (set-system-external-format :locale :utf-8) + (decode-runtime-strings :locale :utf-8) + ;; Need to reinitialize the environment again because + ;; we've possibly changed the environment variables and + ;; pathnames. + (environment-init) + ;; Set the locale for lisp + (intl::setlocale) (ext::process-command-strings process-command-line) (setf *editor-lisp-p* nil) (macrolet ((find-switch (name)
===================================== src/code/unix.lisp ===================================== @@ -25,7 +25,12 @@ ;; it must be set to :iso8859-1 (or left as NIL), making files with ;; non-Latin-1 characters "mojibake", but otherwise they'll be inaccessible. ;; Must be set to NIL initially to enable building Lisp! -(defvar *filename-encoding* nil) +(defvar *filename-encoding* nil + "The encoding to use for converting a namestring to a string that can + be used by the operations system. It must be a valid + external-format name or NIL. NIL means the string is passed as is + to the operating system. The operating system will get the low 8 + bits of each UTF-16 code unit of the string.")
(eval-when (:compile-toplevel :load-toplevel :execute) (defmacro %name->file (string)
===================================== tests/issues.lisp ===================================== @@ -258,6 +258,13 @@ (assert-equal (map 'list #'char-code out-string) (map 'list #'char-code expected))))))
+(define-test issue.25c-setup + (:tag :issues) + ;; Get the external format before running the test issue.25c. See + ;; issue #161 + ;; (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/161). + (assert-true (stream::find-external-format :utf16-be))) + (define-test issue.25c (:tag :issues) ;; Modified test to verify that each octet read from run-program is @@ -682,10 +689,7 @@ ;; work and not return NIL. (assert-true (file-author ".")) (assert-true (file-author "bin/build.sh")) - (let ((unix::*filename-encoding* :utf-8)) - ;; Set filename encoding to utf-8 so that we can encode the - ;; filename properly. - (assert-true + (assert-true (file-author (merge-pathnames (concatenate 'string @@ -696,7 +700,7 @@ '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga) ".txt") - *test-path*))))) + *test-path*))))
(define-test issue.139-default-external-format (:tag :issues)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6fc2e38e925ab9f3fcfb7e5...