Raymond Toy pushed to branch issue-139-filename-encoding-utf8 at cmucl / cmucl
Commits:
-
e7459829
by Raymond Toy at 2022-11-25T15:35:51+00:00
-
88843edc
by Raymond Toy at 2022-11-25T15:35:52+00:00
-
225940e4
by Raymond Toy at 2022-11-25T16:07:57+00:00
-
bea34994
by Raymond Toy at 2022-11-25T16:07:57+00:00
-
73b09b08
by Raymond Toy at 2022-11-25T10:14:09-08:00
8 changed files:
- src/code/save.lisp
- src/code/stream.lisp
- src/code/unix.lisp
- src/general-info/release-21e.md
- src/i18n/locale/cmucl-unix.pot
- src/i18n/locale/cmucl.pot
- src/lisp/os-common.c
- tests/issues.lisp
Changes:
| ... | ... | @@ -142,6 +142,30 @@ |
| 142 | 142 | (file c-call:c-string)
|
| 143 | 143 | (initial-function (alien:unsigned #.vm:word-bits)))
|
| 144 | 144 | |
| 145 | +(defun set-up-locale-external-format ()
|
|
| 146 | + "Add external format alias for :locale to the format specified by
|
|
| 147 | + the locale as set by setlocale(3C)."
|
|
| 148 | + (let ((codeset (unix::unix-get-locale-codeset)))
|
|
| 149 | + (cond ((zerop (length codeset))
|
|
| 150 | + ;; Codeset was the empty string, so just set :locale to
|
|
| 151 | + ;; alias to the default external format.
|
|
| 152 | + (setf (gethash :locale stream::*external-format-aliases*)
|
|
| 153 | + *default-external-format*))
|
|
| 154 | + (t
|
|
| 155 | + (let ((codeset-format (intern codeset "KEYWORD")))
|
|
| 156 | + ;; If we know the format, we can set the alias.
|
|
| 157 | + ;; Otherwise, print a warning and use :iso8859-1 as the
|
|
| 158 | + ;; alias.
|
|
| 159 | + (setf (gethash :locale stream::*external-format-aliases*)
|
|
| 160 | + (if (stream::find-external-format codeset-format nil)
|
|
| 161 | + codeset-format
|
|
| 162 | + (progn
|
|
| 163 | + (warn "Unsupported external format; using :iso8859-1 instead: ~S"
|
|
| 164 | + codeset-format)
|
|
| 165 | + :iso8859-1)))))))
|
|
| 166 | + (values))
|
|
| 167 | + |
|
| 168 | +
|
|
| 145 | 169 | (defun save-lisp (core-file-name &key
|
| 146 | 170 | (purify t)
|
| 147 | 171 | (root-structures ())
|
| ... | ... | @@ -252,8 +276,13 @@ |
| 252 | 276 | ;; Set the runtime locale
|
| 253 | 277 | (unless (zerop (unix::unix-setlocale))
|
| 254 | 278 | (warn "os_setlocale failed"))
|
| 279 | + ;; Load external format aliases now so we can aliases to
|
|
| 280 | + ;; specify the external format.
|
|
| 281 | + (stream::load-external-format-aliases)
|
|
| 255 | 282 | ;; Set the locale for lisp
|
| 256 | 283 | (intl::setlocale)
|
| 284 | + ;; Set up :locale format
|
|
| 285 | + (set-up-locale-external-format)
|
|
| 257 | 286 | (ext::process-command-strings process-command-line)
|
| 258 | 287 | (setf *editor-lisp-p* nil)
|
| 259 | 288 | (macrolet ((find-switch (name)
|
| ... | ... | @@ -290,13 +290,21 @@ |
| 290 | 290 | (stream-dispatch stream
|
| 291 | 291 | ;; simple-stream
|
| 292 | 292 | (stream::%stream-external-format stream)
|
| 293 | - ;; lisp-stream
|
|
| 294 | - (typecase stream
|
|
| 293 | + ;; lisp-stream. For unsupported streams, signal a type error.
|
|
| 294 | + (etypecase stream
|
|
| 295 | 295 | #+unicode
|
| 296 | 296 | (fd-stream (fd-stream-external-format stream))
|
| 297 | - (synonym-stream (stream-external-format
|
|
| 298 | - (symbol-value (synonym-stream-symbol stream))))
|
|
| 299 | - (t :default))
|
|
| 297 | + (broadcast-stream
|
|
| 298 | + ;; See http://www.lispworks.com/documentation/HyperSpec/Body/t_broadc.htm
|
|
| 299 | + (let ((components (broadcast-stream-streams stream)))
|
|
| 300 | + (if (null components)
|
|
| 301 | + :default
|
|
| 302 | + (stream-external-format (car (last components))))))
|
|
| 303 | + (synonym-stream
|
|
| 304 | + ;; Not defined by CLHS. What should happen if
|
|
| 305 | + ;; (synonym-stream-symbol stream) is unbound?
|
|
| 306 | + (stream-external-format
|
|
| 307 | + (symbol-value (synonym-stream-symbol stream)))))
|
|
| 300 | 308 | ;; fundamental-stream
|
| 301 | 309 | :default))
|
| 302 | 310 |
| ... | ... | @@ -2918,3 +2918,10 @@ |
| 2918 | 2918 | 256)))
|
| 2919 | 2919 | (when (zerop result)
|
| 2920 | 2920 | (cast buf c-call:c-string)))))
|
| 2921 | + |
|
| 2922 | +(defun unix-get-locale-codeset ()
|
|
| 2923 | + _N"Get the codeset from the locale"
|
|
| 2924 | + (cast (alien-funcall
|
|
| 2925 | + (extern-alien "os_get_locale_codeset"
|
|
| 2926 | + (function (* char))))
|
|
| 2927 | + c-string)) |
| ... | ... | @@ -59,7 +59,8 @@ public domain. |
| 59 | 59 | * ~~#132~~ Ansi test `RENAME-FILE.1` no fails
|
| 60 | 60 | * ~~#134~~ Handle the case of `(expt complex complex-rational)`
|
| 61 | 61 | * ~~#136~~ `ensure-directories-exist` should return the given pathspec
|
| 62 | - * #139 `*default-external-format*` defaults to `:utf-8`
|
|
| 62 | + * #139 `*default-external-format*` defaults to `:utf-8`; add alias for `:locale` external format
|
|
| 63 | + * ~~#140~~ External format for streams that are not `file-stream`'s
|
|
| 63 | 64 | * ~~#141~~ Disallow locales that are pathnames to a localedef file
|
| 64 | 65 | * ~~#142~~ `(random 0)` signals incorrect error
|
| 65 | 66 | * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream`
|
| ... | ... | @@ -1435,3 +1435,7 @@ msgid "" |
| 1435 | 1435 | " calling this so that the correct locale is returned."
|
| 1436 | 1436 | msgstr ""
|
| 1437 | 1437 | |
| 1438 | +#: src/code/unix.lisp
|
|
| 1439 | +msgid "Get the codeset from the locale"
|
|
| 1440 | +msgstr ""
|
|
| 1441 | + |
| ... | ... | @@ -6714,6 +6714,12 @@ msgid "" |
| 6714 | 6714 | "This is true if and only if the lisp was started with the -edit switch."
|
| 6715 | 6715 | msgstr ""
|
| 6716 | 6716 | |
| 6717 | +#: src/code/save.lisp
|
|
| 6718 | +msgid ""
|
|
| 6719 | +"Add external format alias for :locale to the format specified by\n"
|
|
| 6720 | +" the locale as set by setlocale(3C)."
|
|
| 6721 | +msgstr ""
|
|
| 6722 | + |
|
| 6717 | 6723 | #: src/code/save.lisp
|
| 6718 | 6724 | msgid ""
|
| 6719 | 6725 | "Saves a CMU Common Lisp core image in the file of the specified name. The\n"
|
| ... | ... | @@ -7,6 +7,7 @@ |
| 7 | 7 | |
| 8 | 8 | #include <assert.h>
|
| 9 | 9 | #include <errno.h>
|
| 10 | +#include <langinfo.h>
|
|
| 10 | 11 | #include <locale.h>
|
| 11 | 12 | #include <math.h>
|
| 12 | 13 | #include <netdb.h>
|
| ... | ... | @@ -796,3 +797,9 @@ os_get_lc_messages(char *buf, int len) |
| 796 | 797 | /* Return -1 if setlocale failed. */
|
| 797 | 798 | return locale ? 0 : -1;
|
| 798 | 799 | }
|
| 800 | + |
|
| 801 | +char *
|
|
| 802 | +os_get_locale_codeset()
|
|
| 803 | +{
|
|
| 804 | + return nl_langinfo(CODESET);
|
|
| 805 | +} |
| ... | ... | @@ -766,6 +766,57 @@ |
| 766 | 766 | (assert-equal (map 'list #'char-name string)
|
| 767 | 767 | (map 'list #'char-name (read-line s))))))
|
| 768 | 768 |
|
| 769 | +(define-test issue.139-locale-external-format
|
|
| 770 | + (:tag :issues)
|
|
| 771 | + ;; Just verify that :locale format exists
|
|
| 772 | + (assert-true (stream::find-external-format :locale nil)))
|
|
| 773 | + |
|
| 774 | +;;; Test stream-external-format for various types of streams.
|
|
| 775 | + |
|
| 776 | +(define-test issue.140.two-way-stream
|
|
| 777 | + (:tag :issues)
|
|
| 778 | + (with-open-file (in (merge-pathnames "issues.lisp" cmucl-test-runner::*load-path*)
|
|
| 779 | + :direction :input
|
|
| 780 | + :external-format :utf-8)
|
|
| 781 | + (with-open-file (out "/tmp/output.tst"
|
|
| 782 | + :direction :output
|
|
| 783 | + :external-format :utf-8
|
|
| 784 | + :if-exists :supersede)
|
|
| 785 | + (let ((two-way-stream (make-two-way-stream in out)))
|
|
| 786 | + (assert-error 'type-error
|
|
| 787 | + (stream-external-format two-way-stream))))))
|
|
| 788 | + |
|
| 789 | +;; Test synonym-stream returns the format of the underlying stream.
|
|
| 790 | +(define-test issue.140.synonym-stream
|
|
| 791 | + (:tag :issues)
|
|
| 792 | + (with-open-file (s (merge-pathnames "issues.lisp" cmucl-test-runner::*load-path*)
|
|
| 793 | + :direction :input
|
|
| 794 | + :external-format :iso8859-1)
|
|
| 795 | + (let ((syn (make-synonym-stream '*syn-stream*)))
|
|
| 796 | + (setf syn s)
|
|
| 797 | + (assert-equal :iso8859-1 (stream-external-format syn)))))
|
|
| 798 | + |
|
| 799 | +(define-test issue.140.broadcast-stream
|
|
| 800 | + (:tag :issues)
|
|
| 801 | + ;; Create 3 output streams. The exact external formats aren't
|
|
| 802 | + ;; really important here as long as they're different for each file
|
|
| 803 | + ;; so we can tell if we got the right answer.
|
|
| 804 | + (with-open-file (s1 "/tmp/broad-1"
|
|
| 805 | + :direction :output
|
|
| 806 | + :if-exists :supersede
|
|
| 807 | + :external-format :latin1)
|
|
| 808 | + (with-open-file (s2 "/tmp/broad-2"
|
|
| 809 | + :direction :output
|
|
| 810 | + :if-exists :supersede
|
|
| 811 | + :external-format :utf-8)
|
|
| 812 | + (with-open-file (s3 "/tmp/broad-3"
|
|
| 813 | + :direction :output
|
|
| 814 | + :if-exists :supersede
|
|
| 815 | + :external-format :utf-16)
|
|
| 816 | + ;; The format must be the value from the last stream.
|
|
| 817 | + (assert-equal :utf-16
|
|
| 818 | + (stream-external-format
|
|
| 819 | + (make-broadcast-stream s1 s2 s3)))))))
|
|
| 769 | 820 | |
| 770 | 821 | (define-test issue.150
|
| 771 | 822 | (:tag :issues)
|