Raymond Toy pushed to branch issue-141-locale at cmucl / cmucl
Commits: e8a0cc6c by Raymond Toy at 2022-10-30T15:03:27+00:00 Fix #147: Add method for stream-line-column
- - - - - 0dad5a1a by Raymond Toy at 2022-10-30T15:03:28+00:00 Merge branch 'issue-147-stream-line-column-impl' into 'master'
Fix #147: Add method for stream-line-column
Closes #147
See merge request cmucl/cmucl!104 - - - - - 1300830b by Raymond Toy at 2022-10-31T17:12:48+00:00 Address #139: *default-external-format* is :utf-8
- - - - - 649a4f1e by Raymond Toy at 2022-10-31T17:12:49+00:00 Merge branch 'issue-139-default-external-format-utf8' into 'master'
Address #139: *default-external-format* is :utf-8
See merge request cmucl/cmucl!103 - - - - - 88f6852f by Raymond Toy at 2022-11-01T12:04:55-07:00 Change :iso-8859-1 to :iso8859-1 in find-encoding
While there's an alias for `:iso-8859-1`, it's safer to use `:iso8859-1` which is builtin. Using `:iso-8859-1` requires the alias database to be loaded, which isn't (currently) guaranteed when `find-encoding` is called. Thus use the builtin name instead. Besides, `:iso8859-1` is used in other places in "intl.lisp".
(This is hard to test, but I noticed it when running ``` LANG=ko_KR.utf8 lisp ``` on the branch `issue-139-add-alias-local-external-format`.)
- - - - - d5f1aa5e by Raymond Toy at 2022-11-01T20:35:49+00:00 Update release-21e.md with closed issues. - - - - - 402c0c01 by Raymond Toy at 2022-11-02T01:00:20+00:00 Fix #150: add aliases cp949 euckr
- - - - - d825aa54 by Raymond Toy at 2022-11-02T01:00:20+00:00 Merge branch 'issue-150-add-aliases-cp949-euckr' into 'master'
Fix #150: add aliases cp949 euckr
Closes #150
See merge request cmucl/cmucl!106 - - - - - 33c760fa by Raymond Toy at 2022-11-03T04:47:09+00:00 Fix #149: Call setlocale(3C) on startup
- - - - - 317a33f8 by Raymond Toy at 2022-11-03T04:47:10+00:00 Merge branch 'issue-149-add-setlocale' into 'master'
Fix #149: Call setlocale(3C) on startup
Closes #149
See merge request cmucl/cmucl!105 - - - - - 390f8f3f by Raymond Toy at 2022-11-04T18:14:01-07:00 Update release notes
- - - - - 2c5282bf by Raymond Toy at 2022-11-04T18:48:48-07:00 Merge branch 'master' into issue-141-locale
- - - - - 1dbc1061 by Raymond Toy at 2022-11-04T19:13:13-07:00 Implement unix-getlocale and use it
* lisp/os-common.c * Implement os_getlocale to get the current locale via setlocale(3C) * code/unix.lisp * Define function unix-getlocale to call os_getlocale * code/intl.lisp * Use unix-getlocale to get the locale instead of geting the different environment variables. * i18n/locale/cmucl-unix.pot * Update because of the new docstring
- - - - -
12 changed files:
- src/code/extfmts.lisp - src/code/intl.lisp - src/code/save.lisp - src/code/unix.lisp - src/general-info/release-21e.md - src/i18n/locale/cmucl-unix.pot - src/lisp/os-common.c - src/pcl/gray-streams.lisp - src/pcl/simple-streams/external-formats/aliases - + tests/.gitignore - tests/issues.lisp - + tests/utf8.txt
Changes:
===================================== src/code/extfmts.lisp ===================================== @@ -22,7 +22,7 @@ describe-external-format))
(defvar *default-external-format* - :iso8859-1 + :utf-8 "The default external format to use if no other external format is specified")
===================================== src/code/intl.lisp ===================================== @@ -105,7 +105,7 @@
(defun find-encoding (domain) (when (null (domain-entry-encoding domain)) - (setf (domain-entry-encoding domain) :iso-8859-1) + (setf (domain-entry-encoding domain) :iso8859-1) ;; Domain lookup can call the compiler, so set the locale to "C" ;; so things work. (let* ((*locale* "C") @@ -519,18 +519,8 @@ (if (equal val "") nil val)))
(defun setlocale (&optional locale) - (let ((env-locale (or locale - (getenv "LANGUAGE") - (getenv "LC_ALL") - (getenv "LC_MESSAGES") - (getenv "LANG")))) - (cond - ((and (plusp (length env-locale)) - (char-equal #/ (aref env-locale 0))) - (warn "Locale not changed due to unsupported locale: ~S" env-locale)) - (t - (setf *locale* (or env-locale - *locale*)))))) + (setf *locale* (or (unix::unix-getlocale) + *locale*)))
(defmacro textdomain (domain) `(eval-when (:compile-toplevel :execute)
===================================== src/code/save.lisp ===================================== @@ -249,6 +249,10 @@ (reinit) (environment-init) (dolist (f *after-save-initializations*) (funcall f)) + ;; Set the runtime locale + (unless (zerop (unix::unix-setlocale)) + (warn "os_setlocale failed")) + ;; Set the locale for lisp (intl::setlocale) (ext::process-command-strings process-command-line) (setf *editor-lisp-p* nil)
===================================== src/code/unix.lisp ===================================== @@ -2893,3 +2893,25 @@ of the child in the parent if it works, or NIL and an error number if it doesn't work." (int-syscall ("fork"))) + +(defun unix-setlocale () + _N"Call setlocale(3c) with fixed args. Returns 0 on success." + (alien:alien-funcall + (alien:extern-alien "os_setlocale" + (function c-call:int)))) + +(defun unix-getlocale () + _N"Get the current locale. If we can't, return NIL. A call to + UNIX-SETLOCALE must have been done previously before calling this so + that the correct locale is returned." + (with-alien ((buf (array c-call:char 256))) + (let ((result + (alien-funcall + (extern-alien "os_getlocale" + (function c-call:int + (* c-call:char) + c-call:int)) + (cast buf (* c-call:char)) + 256))) + (when (zerop result) + (cast buf c-call:c-string)))))
===================================== src/general-info/release-21e.md ===================================== @@ -22,6 +22,7 @@ public domain. * Feature enhancements * Changes * Update to ASDF 3.3.6 + * The default external format is `:utf-8` instead of `:iso8859-1` * ANSI compliance fixes: * Bug fixes: * ~~#97~~ Fixes stepping through the source forms in the debugger. This has been broken for quite some time, but it works now. @@ -50,13 +51,19 @@ public domain. * ~~#113~~ REQUIRE on contribs can pull in the wrong things via ASDF.. * ~~#121~~ Wrong column index in FILL-POINTER-OUTPUT-STREAM * ~~#122~~ gcc 11 can't build cmucl + * ~~#124~~ directory with `:wild-inferiors` doesn't descend subdirectories * ~~#125~~ Linux `unix-stat` returning incorrect values * ~~#127~~ Linux unix-getpwuid segfaults when given non-existent uid.. * ~~#128~~ `QUIT` accepts an exit code + * ~~#130~~ Move file-author to C * ~~#132~~ Ansi test `RENAME-FILE.1` no fails * ~~#134~~ Handle the case of `(expt complex complex-rational)` * ~~#136~~ `ensure-directories-exist` should return the given pathspec + * #139 `*default-external-format*` defaults to `:utf-8` + * ~~#141~~ Disallow locales that are pathnames to a localedef file * ~~#142~~ `(random 0)` signals incorrect error + * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream` + * ~~#149~~ Call setlocale(3C) on startup * Other changes: * Improvements to the PCL implementation of CLOS: * Changes to building procedure:
===================================== src/i18n/locale/cmucl-unix.pot ===================================== @@ -1424,3 +1424,14 @@ msgid "" " doesn't work." msgstr ""
+#: src/code/unix.lisp +msgid "Call setlocale(3c) with fixed args. Returns 0 on success." +msgstr "" + +#: src/code/unix.lisp +msgid "" +"Get the current locale. If we can't, return NIL. A call to\n" +" UNIX-SETLOCALE must have been done previously before calling this so\n" +" that the correct locale is returned." +msgstr "" +
===================================== src/lisp/os-common.c ===================================== @@ -7,6 +7,7 @@
#include <assert.h> #include <errno.h> +#include <locale.h> #include <math.h> #include <netdb.h> #include <pwd.h> @@ -773,3 +774,25 @@ exit:
return result; } + +int +os_setlocale(void) +{ + char *result = setlocale(LC_ALL, ""); + + /* Return 0 if setlocale suceeded; otherwise -1. */ + return result != NULL ? 0 : -1; +} + +int +os_getlocale(char *buf, int len) +{ + char *locale = setlocale(LC_ALL, NULL); + if (locale) { + strncpy(buf, locale, len - 1); + buf[len - 1] = '\0'; + } + + /* Return -1 if setlocale failed. */ + return locale ? 0 : -1; +}
===================================== src/pcl/gray-streams.lisp ===================================== @@ -235,6 +235,9 @@ defined for this function, although it is permissible for it to always return NIL."))
+(defmethod stream-line-column ((stream fundamental-character-output-stream)) + nil) + ;;; Stream-line-length is a CMUCL extension to Gray streams. (defgeneric stream-line-length (stream) (:documentation _N"Return the stream line length or Nil."))
===================================== src/pcl/simple-streams/external-formats/aliases ===================================== @@ -223,6 +223,8 @@ windows-cp1252 cp1252 windows-latin1 cp1252 ms-ansi cp1252
+euckr euc-kr +cp949 euc-kr ;; These are not yet implemented ;;iso-2022-jp iso2022-jp ;;iso2022jp iso2022-jp
===================================== tests/.gitignore ===================================== @@ -0,0 +1 @@ +/out-utf8.txt
===================================== tests/issues.lisp ===================================== @@ -5,6 +5,12 @@
(in-package "ISSUES-TESTS")
+(defparameter *test-path* + (merge-pathnames (make-pathname :name :unspecific :type :unspecific + :version :unspecific) + *load-truename*) + "Path to where this file is.") + (defun square (x) (expt x 2))
@@ -676,4 +682,73 @@ ;; work and not return NIL. (assert-true (file-author ".")) (assert-true (file-author "bin/build.sh")) - (assert-true (file-author "tests/안녕하십니까.txt"))) + (let ((unix::*filename-encoding* :utf-8)) + ;; Set filename encoding to utf-8 so that we can encode the + ;; filename properly. + (assert-true + (file-author + (merge-pathnames + (concatenate 'string + ;; Write the test file name this way so + ;; that it's independent of the encoding + ;; used to load this file. The name is + ;; "안녕하십니까". + '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha + #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga) + ".txt") + *test-path*))))) + +(define-test issue.139-default-external-format + (:tag :issues) + (assert-eq :utf-8 stream:*default-external-format*)) + +(define-test issue.139-default-external-format-read-file + (:tag :issues) + (let ((string (concatenate 'string + ;; This is "hello" in Korean + '(#\Hangul_syllable_an + #\Hangul_Syllable_Nyeong + #\Hangul_Syllable_Ha + #\Hangul_Syllable_Se + #\Hangul_Syllable_Yo)))) + ;; Test that opening a file for reading uses the the default :utf8 + ;; encoding. + (with-open-file (s (merge-pathnames "utf8.txt" + *test-path*) + :direction :input) + ;; The first line should be "hello" in Hangul. + (assert-equal (map 'list #'char-name string) + (map 'list #'char-name (read-line s)))))) + +(define-test issue.139-default-external-format-write-file + (:tag :issues) + ;; Test that opening a file for writing uses the default :utf8. + ;; First write something out to the file. Then read it back in + ;; using an explicit format of utf8 and verifying that we got the + ;; right contents. + (let ((string (concatenate 'string + ;; This is "hello" in Korean + '(#\Hangul_syllable_an + #\Hangul_Syllable_Nyeong + #\Hangul_Syllable_Ha + #\Hangul_Syllable_Se + #\Hangul_Syllable_Yo)))) + (with-open-file (s (merge-pathnames "out-utf8.txt" + *test-path*) + :direction :output + :if-exists :supersede) + (write-line string s)) + (with-open-file (s (merge-pathnames "out-utf8.txt" + *test-path*) + :direction :input + :external-format :utf-8) + (assert-equal (map 'list #'char-name string) + (map 'list #'char-name (read-line s)))))) + + +(define-test issue.150 + (:tag :issues) + (let ((ext:*gc-verbose* nil) + (*compile-print* nil)) + (assert-true (stream::find-external-format :euckr)) + (assert-true (stream::find-external-format :cp949))))
===================================== tests/utf8.txt ===================================== @@ -0,0 +1,2 @@ +안녕하세요 +UTF8 test. The above line is "Hello" in Hangul.
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6e975c79c794bb61d18fa0b...