Raymond Toy pushed to branch issue-141-locale at cmucl / cmucl
Commits:
-
e8a0cc6c
by Raymond Toy at 2022-10-30T15:03:27+00:00
-
0dad5a1a
by Raymond Toy at 2022-10-30T15:03:28+00:00
-
1300830b
by Raymond Toy at 2022-10-31T17:12:48+00:00
-
649a4f1e
by Raymond Toy at 2022-10-31T17:12:49+00:00
-
88f6852f
by Raymond Toy at 2022-11-01T12:04:55-07:00
-
d5f1aa5e
by Raymond Toy at 2022-11-01T20:35:49+00:00
-
402c0c01
by Raymond Toy at 2022-11-02T01:00:20+00:00
-
d825aa54
by Raymond Toy at 2022-11-02T01:00:20+00:00
-
33c760fa
by Raymond Toy at 2022-11-03T04:47:09+00:00
-
317a33f8
by Raymond Toy at 2022-11-03T04:47:10+00:00
-
390f8f3f
by Raymond Toy at 2022-11-04T18:14:01-07:00
-
2c5282bf
by Raymond Toy at 2022-11-04T18:48:48-07:00
-
1dbc1061
by Raymond Toy at 2022-11-04T19:13:13-07:00
12 changed files:
- src/code/extfmts.lisp
- src/code/intl.lisp
- src/code/save.lisp
- src/code/unix.lisp
- src/general-info/release-21e.md
- src/i18n/locale/cmucl-unix.pot
- src/lisp/os-common.c
- src/pcl/gray-streams.lisp
- src/pcl/simple-streams/external-formats/aliases
- + tests/.gitignore
- tests/issues.lisp
- + tests/utf8.txt
Changes:
| ... | ... | @@ -22,7 +22,7 @@ |
| 22 | 22 | describe-external-format))
|
| 23 | 23 | |
| 24 | 24 | (defvar *default-external-format*
|
| 25 | - :iso8859-1
|
|
| 25 | + :utf-8
|
|
| 26 | 26 | "The default external format to use if no other external format is
|
| 27 | 27 | specified")
|
| 28 | 28 |
| ... | ... | @@ -105,7 +105,7 @@ |
| 105 | 105 | |
| 106 | 106 | (defun find-encoding (domain)
|
| 107 | 107 | (when (null (domain-entry-encoding domain))
|
| 108 | - (setf (domain-entry-encoding domain) :iso-8859-1)
|
|
| 108 | + (setf (domain-entry-encoding domain) :iso8859-1)
|
|
| 109 | 109 | ;; Domain lookup can call the compiler, so set the locale to "C"
|
| 110 | 110 | ;; so things work.
|
| 111 | 111 | (let* ((*locale* "C")
|
| ... | ... | @@ -519,18 +519,8 @@ |
| 519 | 519 | (if (equal val "") nil val)))
|
| 520 | 520 | |
| 521 | 521 | (defun setlocale (&optional locale)
|
| 522 | - (let ((env-locale (or locale
|
|
| 523 | - (getenv "LANGUAGE")
|
|
| 524 | - (getenv "LC_ALL")
|
|
| 525 | - (getenv "LC_MESSAGES")
|
|
| 526 | - (getenv "LANG"))))
|
|
| 527 | - (cond
|
|
| 528 | - ((and (plusp (length env-locale))
|
|
| 529 | - (char-equal #\/ (aref env-locale 0)))
|
|
| 530 | - (warn "Locale not changed due to unsupported locale: ~S" env-locale))
|
|
| 531 | - (t
|
|
| 532 | - (setf *locale* (or env-locale
|
|
| 533 | - *locale*))))))
|
|
| 522 | + (setf *locale* (or (unix::unix-getlocale)
|
|
| 523 | + *locale*)))
|
|
| 534 | 524 | |
| 535 | 525 | (defmacro textdomain (domain)
|
| 536 | 526 | `(eval-when (:compile-toplevel :execute)
|
| ... | ... | @@ -249,6 +249,10 @@ |
| 249 | 249 | (reinit)
|
| 250 | 250 | (environment-init)
|
| 251 | 251 | (dolist (f *after-save-initializations*) (funcall f))
|
| 252 | + ;; Set the runtime locale
|
|
| 253 | + (unless (zerop (unix::unix-setlocale))
|
|
| 254 | + (warn "os_setlocale failed"))
|
|
| 255 | + ;; Set the locale for lisp
|
|
| 252 | 256 | (intl::setlocale)
|
| 253 | 257 | (ext::process-command-strings process-command-line)
|
| 254 | 258 | (setf *editor-lisp-p* nil)
|
| ... | ... | @@ -2893,3 +2893,25 @@ |
| 2893 | 2893 | of the child in the parent if it works, or NIL and an error number if it
|
| 2894 | 2894 | doesn't work."
|
| 2895 | 2895 | (int-syscall ("fork")))
|
| 2896 | + |
|
| 2897 | +(defun unix-setlocale ()
|
|
| 2898 | + _N"Call setlocale(3c) with fixed args. Returns 0 on success."
|
|
| 2899 | + (alien:alien-funcall
|
|
| 2900 | + (alien:extern-alien "os_setlocale"
|
|
| 2901 | + (function c-call:int))))
|
|
| 2902 | + |
|
| 2903 | +(defun unix-getlocale ()
|
|
| 2904 | + _N"Get the current locale. If we can't, return NIL. A call to
|
|
| 2905 | + UNIX-SETLOCALE must have been done previously before calling this so
|
|
| 2906 | + that the correct locale is returned."
|
|
| 2907 | + (with-alien ((buf (array c-call:char 256)))
|
|
| 2908 | + (let ((result
|
|
| 2909 | + (alien-funcall
|
|
| 2910 | + (extern-alien "os_getlocale"
|
|
| 2911 | + (function c-call:int
|
|
| 2912 | + (* c-call:char)
|
|
| 2913 | + c-call:int))
|
|
| 2914 | + (cast buf (* c-call:char))
|
|
| 2915 | + 256)))
|
|
| 2916 | + (when (zerop result)
|
|
| 2917 | + (cast buf c-call:c-string))))) |
| ... | ... | @@ -22,6 +22,7 @@ public domain. |
| 22 | 22 | * Feature enhancements
|
| 23 | 23 | * Changes
|
| 24 | 24 | * Update to ASDF 3.3.6
|
| 25 | + * The default external format is `:utf-8` instead of `:iso8859-1`
|
|
| 25 | 26 | * ANSI compliance fixes:
|
| 26 | 27 | * Bug fixes:
|
| 27 | 28 | * ~~#97~~ Fixes stepping through the source forms in the debugger. This has been broken for quite some time, but it works now.
|
| ... | ... | @@ -50,13 +51,19 @@ public domain. |
| 50 | 51 | * ~~#113~~ REQUIRE on contribs can pull in the wrong things via ASDF.
|
| 51 | 52 | * ~~#121~~ Wrong column index in FILL-POINTER-OUTPUT-STREAM
|
| 52 | 53 | * ~~#122~~ gcc 11 can't build cmucl
|
| 54 | + * ~~#124~~ directory with `:wild-inferiors` doesn't descend subdirectories
|
|
| 53 | 55 | * ~~#125~~ Linux `unix-stat` returning incorrect values
|
| 54 | 56 | * ~~#127~~ Linux unix-getpwuid segfaults when given non-existent uid.
|
| 55 | 57 | * ~~#128~~ `QUIT` accepts an exit code
|
| 58 | + * ~~#130~~ Move file-author to C
|
|
| 56 | 59 | * ~~#132~~ Ansi test `RENAME-FILE.1` no fails
|
| 57 | 60 | * ~~#134~~ Handle the case of `(expt complex complex-rational)`
|
| 58 | 61 | * ~~#136~~ `ensure-directories-exist` should return the given pathspec
|
| 62 | + * #139 `*default-external-format*` defaults to `:utf-8`
|
|
| 63 | + * ~~#141~~ Disallow locales that are pathnames to a localedef file
|
|
| 59 | 64 | * ~~#142~~ `(random 0)` signals incorrect error
|
| 65 | + * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream`
|
|
| 66 | + * ~~#149~~ Call setlocale(3C) on startup
|
|
| 60 | 67 | * Other changes:
|
| 61 | 68 | * Improvements to the PCL implementation of CLOS:
|
| 62 | 69 | * Changes to building procedure:
|
| ... | ... | @@ -1424,3 +1424,14 @@ msgid "" |
| 1424 | 1424 | " doesn't work."
|
| 1425 | 1425 | msgstr ""
|
| 1426 | 1426 | |
| 1427 | +#: src/code/unix.lisp
|
|
| 1428 | +msgid "Call setlocale(3c) with fixed args. Returns 0 on success."
|
|
| 1429 | +msgstr ""
|
|
| 1430 | + |
|
| 1431 | +#: src/code/unix.lisp
|
|
| 1432 | +msgid ""
|
|
| 1433 | +"Get the current locale. If we can't, return NIL. A call to\n"
|
|
| 1434 | +" UNIX-SETLOCALE must have been done previously before calling this so\n"
|
|
| 1435 | +" that the correct locale is returned."
|
|
| 1436 | +msgstr ""
|
|
| 1437 | + |
| ... | ... | @@ -7,6 +7,7 @@ |
| 7 | 7 | |
| 8 | 8 | #include <assert.h>
|
| 9 | 9 | #include <errno.h>
|
| 10 | +#include <locale.h>
|
|
| 10 | 11 | #include <math.h>
|
| 11 | 12 | #include <netdb.h>
|
| 12 | 13 | #include <pwd.h>
|
| ... | ... | @@ -773,3 +774,25 @@ exit: |
| 773 | 774 |
|
| 774 | 775 | return result;
|
| 775 | 776 | }
|
| 777 | + |
|
| 778 | +int
|
|
| 779 | +os_setlocale(void)
|
|
| 780 | +{
|
|
| 781 | + char *result = setlocale(LC_ALL, "");
|
|
| 782 | + |
|
| 783 | + /* Return 0 if setlocale suceeded; otherwise -1. */
|
|
| 784 | + return result != NULL ? 0 : -1;
|
|
| 785 | +}
|
|
| 786 | + |
|
| 787 | +int
|
|
| 788 | +os_getlocale(char *buf, int len)
|
|
| 789 | +{
|
|
| 790 | + char *locale = setlocale(LC_ALL, NULL);
|
|
| 791 | + if (locale) {
|
|
| 792 | + strncpy(buf, locale, len - 1);
|
|
| 793 | + buf[len - 1] = '\0';
|
|
| 794 | + }
|
|
| 795 | + |
|
| 796 | + /* Return -1 if setlocale failed. */
|
|
| 797 | + return locale ? 0 : -1;
|
|
| 798 | +} |
| ... | ... | @@ -235,6 +235,9 @@ |
| 235 | 235 | defined for this function, although it is permissible for it to
|
| 236 | 236 | always return NIL."))
|
| 237 | 237 | |
| 238 | +(defmethod stream-line-column ((stream fundamental-character-output-stream))
|
|
| 239 | + nil)
|
|
| 240 | + |
|
| 238 | 241 | ;;; Stream-line-length is a CMUCL extension to Gray streams.
|
| 239 | 242 | (defgeneric stream-line-length (stream)
|
| 240 | 243 | (:documentation _N"Return the stream line length or Nil."))
|
| ... | ... | @@ -223,6 +223,8 @@ windows-cp1252 cp1252 |
| 223 | 223 | windows-latin1 cp1252
|
| 224 | 224 | ms-ansi cp1252
|
| 225 | 225 | |
| 226 | +euckr euc-kr
|
|
| 227 | +cp949 euc-kr
|
|
| 226 | 228 | ;; These are not yet implemented
|
| 227 | 229 | ;;iso-2022-jp iso2022-jp
|
| 228 | 230 | ;;iso2022jp iso2022-jp
|
| 1 | +/out-utf8.txt |
| ... | ... | @@ -5,6 +5,12 @@ |
| 5 | 5 | |
| 6 | 6 | (in-package "ISSUES-TESTS")
|
| 7 | 7 | |
| 8 | +(defparameter *test-path*
|
|
| 9 | + (merge-pathnames (make-pathname :name :unspecific :type :unspecific
|
|
| 10 | + :version :unspecific)
|
|
| 11 | + *load-truename*)
|
|
| 12 | + "Path to where this file is.")
|
|
| 13 | + |
|
| 8 | 14 | (defun square (x)
|
| 9 | 15 | (expt x 2))
|
| 10 | 16 | |
| ... | ... | @@ -676,4 +682,73 @@ |
| 676 | 682 | ;; work and not return NIL.
|
| 677 | 683 | (assert-true (file-author "."))
|
| 678 | 684 | (assert-true (file-author "bin/build.sh"))
|
| 679 | - (assert-true (file-author "tests/안녕하십니까.txt"))) |
|
| 685 | + (let ((unix::*filename-encoding* :utf-8))
|
|
| 686 | + ;; Set filename encoding to utf-8 so that we can encode the
|
|
| 687 | + ;; filename properly.
|
|
| 688 | + (assert-true
|
|
| 689 | + (file-author
|
|
| 690 | + (merge-pathnames
|
|
| 691 | + (concatenate 'string
|
|
| 692 | + ;; Write the test file name this way so
|
|
| 693 | + ;; that it's independent of the encoding
|
|
| 694 | + ;; used to load this file. The name is
|
|
| 695 | + ;; "안녕하십니까".
|
|
| 696 | + '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha
|
|
| 697 | + #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga)
|
|
| 698 | + ".txt")
|
|
| 699 | + *test-path*)))))
|
|
| 700 | + |
|
| 701 | +(define-test issue.139-default-external-format
|
|
| 702 | + (:tag :issues)
|
|
| 703 | + (assert-eq :utf-8 stream:*default-external-format*))
|
|
| 704 | + |
|
| 705 | +(define-test issue.139-default-external-format-read-file
|
|
| 706 | + (:tag :issues)
|
|
| 707 | + (let ((string (concatenate 'string
|
|
| 708 | + ;; This is "hello" in Korean
|
|
| 709 | + '(#\Hangul_syllable_an
|
|
| 710 | + #\Hangul_Syllable_Nyeong
|
|
| 711 | + #\Hangul_Syllable_Ha
|
|
| 712 | + #\Hangul_Syllable_Se
|
|
| 713 | + #\Hangul_Syllable_Yo))))
|
|
| 714 | + ;; Test that opening a file for reading uses the the default :utf8
|
|
| 715 | + ;; encoding.
|
|
| 716 | + (with-open-file (s (merge-pathnames "utf8.txt"
|
|
| 717 | + *test-path*)
|
|
| 718 | + :direction :input)
|
|
| 719 | + ;; The first line should be "hello" in Hangul.
|
|
| 720 | + (assert-equal (map 'list #'char-name string)
|
|
| 721 | + (map 'list #'char-name (read-line s))))))
|
|
| 722 | + |
|
| 723 | +(define-test issue.139-default-external-format-write-file
|
|
| 724 | + (:tag :issues)
|
|
| 725 | + ;; Test that opening a file for writing uses the default :utf8.
|
|
| 726 | + ;; First write something out to the file. Then read it back in
|
|
| 727 | + ;; using an explicit format of utf8 and verifying that we got the
|
|
| 728 | + ;; right contents.
|
|
| 729 | + (let ((string (concatenate 'string
|
|
| 730 | + ;; This is "hello" in Korean
|
|
| 731 | + '(#\Hangul_syllable_an
|
|
| 732 | + #\Hangul_Syllable_Nyeong
|
|
| 733 | + #\Hangul_Syllable_Ha
|
|
| 734 | + #\Hangul_Syllable_Se
|
|
| 735 | + #\Hangul_Syllable_Yo))))
|
|
| 736 | + (with-open-file (s (merge-pathnames "out-utf8.txt"
|
|
| 737 | + *test-path*)
|
|
| 738 | + :direction :output
|
|
| 739 | + :if-exists :supersede)
|
|
| 740 | + (write-line string s))
|
|
| 741 | + (with-open-file (s (merge-pathnames "out-utf8.txt"
|
|
| 742 | + *test-path*)
|
|
| 743 | + :direction :input
|
|
| 744 | + :external-format :utf-8)
|
|
| 745 | + (assert-equal (map 'list #'char-name string)
|
|
| 746 | + (map 'list #'char-name (read-line s))))))
|
|
| 747 | +
|
|
| 748 | + |
|
| 749 | +(define-test issue.150
|
|
| 750 | + (:tag :issues)
|
|
| 751 | + (let ((ext:*gc-verbose* nil)
|
|
| 752 | + (*compile-print* nil))
|
|
| 753 | + (assert-true (stream::find-external-format :euckr))
|
|
| 754 | + (assert-true (stream::find-external-format :cp949)))) |
| 1 | +안녕하세요
|
|
| 2 | +UTF8 test. The above line is "Hello" in Hangul. |