
Raymond Toy pushed to branch issue-141-locale at cmucl / cmucl Commits: e8a0cc6c by Raymond Toy at 2022-10-30T15:03:27+00:00 Fix #147: Add method for stream-line-column - - - - - 0dad5a1a by Raymond Toy at 2022-10-30T15:03:28+00:00 Merge branch 'issue-147-stream-line-column-impl' into 'master' Fix #147: Add method for stream-line-column Closes #147 See merge request cmucl/cmucl!104 - - - - - 1300830b by Raymond Toy at 2022-10-31T17:12:48+00:00 Address #139: *default-external-format* is :utf-8 - - - - - 649a4f1e by Raymond Toy at 2022-10-31T17:12:49+00:00 Merge branch 'issue-139-default-external-format-utf8' into 'master' Address #139: *default-external-format* is :utf-8 See merge request cmucl/cmucl!103 - - - - - 88f6852f by Raymond Toy at 2022-11-01T12:04:55-07:00 Change :iso-8859-1 to :iso8859-1 in find-encoding While there's an alias for `:iso-8859-1`, it's safer to use `:iso8859-1` which is builtin. Using `:iso-8859-1` requires the alias database to be loaded, which isn't (currently) guaranteed when `find-encoding` is called. Thus use the builtin name instead. Besides, `:iso8859-1` is used in other places in "intl.lisp". (This is hard to test, but I noticed it when running ``` LANG=ko_KR.utf8 lisp ``` on the branch `issue-139-add-alias-local-external-format`.) - - - - - d5f1aa5e by Raymond Toy at 2022-11-01T20:35:49+00:00 Update release-21e.md with closed issues. - - - - - 402c0c01 by Raymond Toy at 2022-11-02T01:00:20+00:00 Fix #150: add aliases cp949 euckr - - - - - d825aa54 by Raymond Toy at 2022-11-02T01:00:20+00:00 Merge branch 'issue-150-add-aliases-cp949-euckr' into 'master' Fix #150: add aliases cp949 euckr Closes #150 See merge request cmucl/cmucl!106 - - - - - 33c760fa by Raymond Toy at 2022-11-03T04:47:09+00:00 Fix #149: Call setlocale(3C) on startup - - - - - 317a33f8 by Raymond Toy at 2022-11-03T04:47:10+00:00 Merge branch 'issue-149-add-setlocale' into 'master' Fix #149: Call setlocale(3C) on startup Closes #149 See merge request cmucl/cmucl!105 - - - - - 390f8f3f by Raymond Toy at 2022-11-04T18:14:01-07:00 Update release notes - - - - - 2c5282bf by Raymond Toy at 2022-11-04T18:48:48-07:00 Merge branch 'master' into issue-141-locale - - - - - 1dbc1061 by Raymond Toy at 2022-11-04T19:13:13-07:00 Implement unix-getlocale and use it * lisp/os-common.c * Implement os_getlocale to get the current locale via setlocale(3C) * code/unix.lisp * Define function unix-getlocale to call os_getlocale * code/intl.lisp * Use unix-getlocale to get the locale instead of geting the different environment variables. * i18n/locale/cmucl-unix.pot * Update because of the new docstring - - - - - 12 changed files: - src/code/extfmts.lisp - src/code/intl.lisp - src/code/save.lisp - src/code/unix.lisp - src/general-info/release-21e.md - src/i18n/locale/cmucl-unix.pot - src/lisp/os-common.c - src/pcl/gray-streams.lisp - src/pcl/simple-streams/external-formats/aliases - + tests/.gitignore - tests/issues.lisp - + tests/utf8.txt Changes: ===================================== src/code/extfmts.lisp ===================================== @@ -22,7 +22,7 @@ describe-external-format)) (defvar *default-external-format* - :iso8859-1 + :utf-8 "The default external format to use if no other external format is specified") ===================================== src/code/intl.lisp ===================================== @@ -105,7 +105,7 @@ (defun find-encoding (domain) (when (null (domain-entry-encoding domain)) - (setf (domain-entry-encoding domain) :iso-8859-1) + (setf (domain-entry-encoding domain) :iso8859-1) ;; Domain lookup can call the compiler, so set the locale to "C" ;; so things work. (let* ((*locale* "C") @@ -519,18 +519,8 @@ (if (equal val "") nil val))) (defun setlocale (&optional locale) - (let ((env-locale (or locale - (getenv "LANGUAGE") - (getenv "LC_ALL") - (getenv "LC_MESSAGES") - (getenv "LANG")))) - (cond - ((and (plusp (length env-locale)) - (char-equal #\/ (aref env-locale 0))) - (warn "Locale not changed due to unsupported locale: ~S" env-locale)) - (t - (setf *locale* (or env-locale - *locale*)))))) + (setf *locale* (or (unix::unix-getlocale) + *locale*))) (defmacro textdomain (domain) `(eval-when (:compile-toplevel :execute) ===================================== src/code/save.lisp ===================================== @@ -249,6 +249,10 @@ (reinit) (environment-init) (dolist (f *after-save-initializations*) (funcall f)) + ;; Set the runtime locale + (unless (zerop (unix::unix-setlocale)) + (warn "os_setlocale failed")) + ;; Set the locale for lisp (intl::setlocale) (ext::process-command-strings process-command-line) (setf *editor-lisp-p* nil) ===================================== src/code/unix.lisp ===================================== @@ -2893,3 +2893,25 @@ of the child in the parent if it works, or NIL and an error number if it doesn't work." (int-syscall ("fork"))) + +(defun unix-setlocale () + _N"Call setlocale(3c) with fixed args. Returns 0 on success." + (alien:alien-funcall + (alien:extern-alien "os_setlocale" + (function c-call:int)))) + +(defun unix-getlocale () + _N"Get the current locale. If we can't, return NIL. A call to + UNIX-SETLOCALE must have been done previously before calling this so + that the correct locale is returned." + (with-alien ((buf (array c-call:char 256))) + (let ((result + (alien-funcall + (extern-alien "os_getlocale" + (function c-call:int + (* c-call:char) + c-call:int)) + (cast buf (* c-call:char)) + 256))) + (when (zerop result) + (cast buf c-call:c-string))))) ===================================== src/general-info/release-21e.md ===================================== @@ -22,6 +22,7 @@ public domain. * Feature enhancements * Changes * Update to ASDF 3.3.6 + * The default external format is `:utf-8` instead of `:iso8859-1` * ANSI compliance fixes: * Bug fixes: * ~~#97~~ Fixes stepping through the source forms in the debugger. This has been broken for quite some time, but it works now. @@ -50,13 +51,19 @@ public domain. * ~~#113~~ REQUIRE on contribs can pull in the wrong things via ASDF.. * ~~#121~~ Wrong column index in FILL-POINTER-OUTPUT-STREAM * ~~#122~~ gcc 11 can't build cmucl + * ~~#124~~ directory with `:wild-inferiors` doesn't descend subdirectories * ~~#125~~ Linux `unix-stat` returning incorrect values * ~~#127~~ Linux unix-getpwuid segfaults when given non-existent uid.. * ~~#128~~ `QUIT` accepts an exit code + * ~~#130~~ Move file-author to C * ~~#132~~ Ansi test `RENAME-FILE.1` no fails * ~~#134~~ Handle the case of `(expt complex complex-rational)` * ~~#136~~ `ensure-directories-exist` should return the given pathspec + * #139 `*default-external-format*` defaults to `:utf-8` + * ~~#141~~ Disallow locales that are pathnames to a localedef file * ~~#142~~ `(random 0)` signals incorrect error + * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream` + * ~~#149~~ Call setlocale(3C) on startup * Other changes: * Improvements to the PCL implementation of CLOS: * Changes to building procedure: ===================================== src/i18n/locale/cmucl-unix.pot ===================================== @@ -1424,3 +1424,14 @@ msgid "" " doesn't work." msgstr "" +#: src/code/unix.lisp +msgid "Call setlocale(3c) with fixed args. Returns 0 on success." +msgstr "" + +#: src/code/unix.lisp +msgid "" +"Get the current locale. If we can't, return NIL. A call to\n" +" UNIX-SETLOCALE must have been done previously before calling this so\n" +" that the correct locale is returned." +msgstr "" + ===================================== src/lisp/os-common.c ===================================== @@ -7,6 +7,7 @@ #include <assert.h> #include <errno.h> +#include <locale.h> #include <math.h> #include <netdb.h> #include <pwd.h> @@ -773,3 +774,25 @@ exit: return result; } + +int +os_setlocale(void) +{ + char *result = setlocale(LC_ALL, ""); + + /* Return 0 if setlocale suceeded; otherwise -1. */ + return result != NULL ? 0 : -1; +} + +int +os_getlocale(char *buf, int len) +{ + char *locale = setlocale(LC_ALL, NULL); + if (locale) { + strncpy(buf, locale, len - 1); + buf[len - 1] = '\0'; + } + + /* Return -1 if setlocale failed. */ + return locale ? 0 : -1; +} ===================================== src/pcl/gray-streams.lisp ===================================== @@ -235,6 +235,9 @@ defined for this function, although it is permissible for it to always return NIL.")) +(defmethod stream-line-column ((stream fundamental-character-output-stream)) + nil) + ;;; Stream-line-length is a CMUCL extension to Gray streams. (defgeneric stream-line-length (stream) (:documentation _N"Return the stream line length or Nil.")) ===================================== src/pcl/simple-streams/external-formats/aliases ===================================== @@ -223,6 +223,8 @@ windows-cp1252 cp1252 windows-latin1 cp1252 ms-ansi cp1252 +euckr euc-kr +cp949 euc-kr ;; These are not yet implemented ;;iso-2022-jp iso2022-jp ;;iso2022jp iso2022-jp ===================================== tests/.gitignore ===================================== @@ -0,0 +1 @@ +/out-utf8.txt ===================================== tests/issues.lisp ===================================== @@ -5,6 +5,12 @@ (in-package "ISSUES-TESTS") +(defparameter *test-path* + (merge-pathnames (make-pathname :name :unspecific :type :unspecific + :version :unspecific) + *load-truename*) + "Path to where this file is.") + (defun square (x) (expt x 2)) @@ -676,4 +682,73 @@ ;; work and not return NIL. (assert-true (file-author ".")) (assert-true (file-author "bin/build.sh")) - (assert-true (file-author "tests/안녕하십니까.txt"))) + (let ((unix::*filename-encoding* :utf-8)) + ;; Set filename encoding to utf-8 so that we can encode the + ;; filename properly. + (assert-true + (file-author + (merge-pathnames + (concatenate 'string + ;; Write the test file name this way so + ;; that it's independent of the encoding + ;; used to load this file. The name is + ;; "안녕하십니까". + '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha + #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga) + ".txt") + *test-path*))))) + +(define-test issue.139-default-external-format + (:tag :issues) + (assert-eq :utf-8 stream:*default-external-format*)) + +(define-test issue.139-default-external-format-read-file + (:tag :issues) + (let ((string (concatenate 'string + ;; This is "hello" in Korean + '(#\Hangul_syllable_an + #\Hangul_Syllable_Nyeong + #\Hangul_Syllable_Ha + #\Hangul_Syllable_Se + #\Hangul_Syllable_Yo)))) + ;; Test that opening a file for reading uses the the default :utf8 + ;; encoding. + (with-open-file (s (merge-pathnames "utf8.txt" + *test-path*) + :direction :input) + ;; The first line should be "hello" in Hangul. + (assert-equal (map 'list #'char-name string) + (map 'list #'char-name (read-line s)))))) + +(define-test issue.139-default-external-format-write-file + (:tag :issues) + ;; Test that opening a file for writing uses the default :utf8. + ;; First write something out to the file. Then read it back in + ;; using an explicit format of utf8 and verifying that we got the + ;; right contents. + (let ((string (concatenate 'string + ;; This is "hello" in Korean + '(#\Hangul_syllable_an + #\Hangul_Syllable_Nyeong + #\Hangul_Syllable_Ha + #\Hangul_Syllable_Se + #\Hangul_Syllable_Yo)))) + (with-open-file (s (merge-pathnames "out-utf8.txt" + *test-path*) + :direction :output + :if-exists :supersede) + (write-line string s)) + (with-open-file (s (merge-pathnames "out-utf8.txt" + *test-path*) + :direction :input + :external-format :utf-8) + (assert-equal (map 'list #'char-name string) + (map 'list #'char-name (read-line s)))))) + + +(define-test issue.150 + (:tag :issues) + (let ((ext:*gc-verbose* nil) + (*compile-print* nil)) + (assert-true (stream::find-external-format :euckr)) + (assert-true (stream::find-external-format :cp949)))) ===================================== tests/utf8.txt ===================================== @@ -0,0 +1,2 @@ +안녕하세요 +UTF8 test. The above line is "Hello" in Hangul. View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6e975c79c794bb61d18fa0b... -- View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6e975c79c794bb61d18fa0b... You're receiving this email because of your account on gitlab.common-lisp.net.
participants (1)
-
Raymond Toy (@rtoy)