Raymond Toy pushed to branch issue-139-add-alias-local-external-format at cmucl / cmucl
Commits: 88f6852f by Raymond Toy at 2022-11-01T12:04:55-07:00 Change :iso-8859-1 to :iso8859-1 in find-encoding
While there's an alias for `:iso-8859-1`, it's safer to use `:iso8859-1` which is builtin. Using `:iso-8859-1` requires the alias database to be loaded, which isn't (currently) guaranteed when `find-encoding` is called. Thus use the builtin name instead. Besides, `:iso8859-1` is used in other places in "intl.lisp".
(This is hard to test, but I noticed it when running ``` LANG=ko_KR.utf8 lisp ``` on the branch `issue-139-add-alias-local-external-format`.)
- - - - - 35b1282d by Raymond Toy at 2022-11-01T12:08:51-07:00 Merge branch 'master' into issue-139-add-alias-local-external-format
- - - - - 7b17a82e by Raymond Toy at 2022-11-01T12:38:48-07:00 Add interface unix-get-locale-codeset
* src/lisp/os-common.c * Add the function `os_get_locale_codeset` to get the codeset for our locale.
* src/code/unix.lisp * Add alien interface to `os_get_locale_codeset`, named `unix-get-locale-codeset`.
* src/code/save.lisp * Use `unix-get-locale-codeset` to figure out how to set up an alias for the external format named `:locale`. Set this up in the initial function when we save lisp.
- - - - -
4 changed files:
- src/code/intl.lisp - src/code/save.lisp - src/code/unix.lisp - src/lisp/os-common.c
Changes:
===================================== src/code/intl.lisp ===================================== @@ -105,7 +105,7 @@
(defun find-encoding (domain) (when (null (domain-entry-encoding domain)) - (setf (domain-entry-encoding domain) :iso-8859-1) + (setf (domain-entry-encoding domain) :iso8859-1) ;; Domain lookup can call the compiler, so set the locale to "C" ;; so things work. (let* ((*locale* "C")
===================================== src/code/save.lisp ===================================== @@ -145,53 +145,25 @@ (defun set-up-locale-external-format () "Add external format alias for :locale to the format specified by the envvar LANG and friends if available." - ;; Find the envvar that will tell us what encoding to use. - ;; - ;; See https://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html - ;; - (let* ((lang (or (unix:unix-getenv "LC_ALL") - (unix:unix-getenv "LC_MESSAGES") - (unix:unix-getenv "LANG"))) - (length (length lang))) - ;; If LANG isn't set, just set :locale to alias to the - ;; default-external-format. - (unless lang - (setf (gethash :locale stream::*external-format-aliases*) *default-external-format*) - (return-from set-up-locale-external-format (values))) - ;; Extract the external format from the envvar and set up the - ;; :locale alias. - (let ((new-alias - (cond - ((or (string-equal "C" lang :end2 (min 1 length)) - (string-equal "POSIX" lang :end2 (min 5 length))) - ;; If the lang is "C" or "POSIX", ignoring anything after - ;; that, default to :iso8859-1. - :iso8859-1) - ((string-equal "/" lang :end2 (min 1 length)) - ;; Also, we don't handle the case where the locale starts - ;; with a slash which means a pathname to a file created by - ;; the localedef utility. So use our defaults for that case - ;; as well. - :iso8859-1) - (t - ;; Simple parsing of LANG. We assume it looks like - ;; "language[_territory][.codeset]". We're only interested - ;; in the codeset, if given. Some LC_ vars also have an - ;; optional @modifier after the codeset; we ignore that too. - (let ((dot (position #. lang)) - (at (or (position #@ lang) nil))) - (when dot - (let* ((codeset (subseq lang (1+ dot) at)) - (format (intern codeset "KEYWORD"))) - (cond ((stream::find-external-format format nil) - format) - (t - (warn "Unknown or unsupported external format: ~S" - codeset) - *default-external-format*))))))))) - (assert new-alias) - (setf (gethash :locale stream::*external-format-aliases*) new-alias)) - (values))) + (let ((codeset (unix::unix-get-locale-codeset))) + (cond ((zerop (length codeset)) + ;; Codeset was the empty string, so just set :locale to + ;; alias to the default external format. + (setf (gethash :locale stream::*external-format-aliases*) + *default-external-format*)) + (t + (let ((codeset-format (intern codeset "KEYWORD"))) + ;; If we know the format, we can set the alias. + ;; Otherwise, print a warning and use :iso8859-1 as the + ;; alias. + (setf (gethash :locale stream::*external-format-aliases*) + (if (stream::find-external-format codeset-format nil) + codeset-format + (progn + (warn "Unsupported external format; using :iso8859-1 instead: ~S" + codeset-format) + :iso8859-1))))))) + (values))
(defun save-lisp (core-file-name &key @@ -301,6 +273,7 @@ (reinit) (environment-init) (dolist (f *after-save-initializations*) (funcall f)) + (stream::load-external-format-aliases) (intl::setlocale) (ext::process-command-strings process-command-line) (setf *editor-lisp-p* nil)
===================================== src/code/unix.lisp ===================================== @@ -2893,3 +2893,13 @@ of the child in the parent if it works, or NIL and an error number if it doesn't work." (int-syscall ("fork"))) + +(defun unix-get-locale-codeset () + _N"Get the codeset from the locale" + (with-alien ((codeset (array c-call:char 512))) + (alien-funcall + (extern-alien "os_get_locale_codeset" + (function void (* char) int)) + (cast codeset (* c-call:char)) + 512) + (cast codeset c-string)))
===================================== src/lisp/os-common.c ===================================== @@ -7,6 +7,8 @@
#include <assert.h> #include <errno.h> +#include <langinfo.h> +#include <locale.h> #include <math.h> #include <netdb.h> #include <pwd.h> @@ -773,3 +775,15 @@ exit:
return result; } + +void +os_get_locale_codeset(char* codeset, int len) +{ + char *code; + + setlocale(LC_ALL, ""); + + code = nl_langinfo(CODESET); + + strncpy(codeset, code, len); +}
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/69f2a9909142a2e6d5b91db...