Raymond Toy pushed to branch issue-141-locale at cmucl / cmucl

Commits:

12 changed files:

Changes:

  • src/code/extfmts.lisp
    ... ... @@ -22,7 +22,7 @@
    22 22
     	  describe-external-format))
    
    23 23
     
    
    24 24
     (defvar *default-external-format*
    
    25
    -  :iso8859-1
    
    25
    +  :utf-8
    
    26 26
       "The default external format to use if no other external format is
    
    27 27
       specified")
    
    28 28
     
    

  • src/code/intl.lisp
    ... ... @@ -105,7 +105,7 @@
    105 105
     
    
    106 106
     (defun find-encoding (domain)
    
    107 107
       (when (null (domain-entry-encoding domain))
    
    108
    -    (setf (domain-entry-encoding domain) :iso-8859-1)
    
    108
    +    (setf (domain-entry-encoding domain) :iso8859-1)
    
    109 109
         ;; Domain lookup can call the compiler, so set the locale to "C"
    
    110 110
         ;; so things work.
    
    111 111
         (let* ((*locale* "C")
    
    ... ... @@ -519,18 +519,8 @@
    519 519
         (if (equal val "") nil val)))
    
    520 520
     
    
    521 521
     (defun setlocale (&optional locale)
    
    522
    -  (let ((env-locale (or locale
    
    523
    -			(getenv "LANGUAGE")
    
    524
    -			(getenv "LC_ALL")
    
    525
    -			(getenv "LC_MESSAGES")
    
    526
    -			(getenv "LANG"))))
    
    527
    -    (cond
    
    528
    -      ((and (plusp (length env-locale))
    
    529
    -	    (char-equal #\/ (aref env-locale 0)))
    
    530
    -       (warn "Locale not changed due to unsupported locale: ~S" env-locale))
    
    531
    -      (t
    
    532
    -       (setf *locale* (or env-locale
    
    533
    -			  *locale*))))))
    
    522
    +  (setf *locale* (or (unix::unix-getlocale)
    
    523
    +		     *locale*)))
    
    534 524
     
    
    535 525
     (defmacro textdomain (domain)
    
    536 526
       `(eval-when (:compile-toplevel :execute)
    

  • src/code/save.lisp
    ... ... @@ -249,6 +249,10 @@
    249 249
     	     (reinit)
    
    250 250
     	     (environment-init)
    
    251 251
     	     (dolist (f *after-save-initializations*) (funcall f))
    
    252
    +	     ;; Set the runtime locale
    
    253
    +	     (unless (zerop (unix::unix-setlocale))
    
    254
    +	       (warn "os_setlocale failed"))
    
    255
    +	     ;; Set the locale for lisp
    
    252 256
     	     (intl::setlocale)
    
    253 257
     	     (ext::process-command-strings process-command-line)
    
    254 258
     	     (setf *editor-lisp-p* nil)
    

  • src/code/unix.lisp
    ... ... @@ -2893,3 +2893,25 @@
    2893 2893
        of the child in the parent if it works, or NIL and an error number if it
    
    2894 2894
        doesn't work."
    
    2895 2895
       (int-syscall ("fork")))
    
    2896
    +
    
    2897
    +(defun unix-setlocale ()
    
    2898
    +  _N"Call setlocale(3c) with fixed args.  Returns 0 on success."
    
    2899
    +  (alien:alien-funcall
    
    2900
    +   (alien:extern-alien "os_setlocale"
    
    2901
    +		       (function c-call:int))))
    
    2902
    +
    
    2903
    +(defun unix-getlocale ()
    
    2904
    +  _N"Get the current locale.  If we can't, return NIL.  A call to
    
    2905
    +  UNIX-SETLOCALE must have been done previously before calling this so
    
    2906
    +  that the correct locale is returned."
    
    2907
    +  (with-alien ((buf (array c-call:char 256)))
    
    2908
    +    (let ((result
    
    2909
    +	    (alien-funcall
    
    2910
    +	     (extern-alien "os_getlocale"
    
    2911
    +			   (function c-call:int
    
    2912
    +				     (* c-call:char)
    
    2913
    +				     c-call:int))
    
    2914
    +	     (cast buf (* c-call:char))
    
    2915
    +	     256)))
    
    2916
    +      (when (zerop result)
    
    2917
    +	(cast buf c-call:c-string)))))

  • src/general-info/release-21e.md
    ... ... @@ -22,6 +22,7 @@ public domain.
    22 22
       * Feature enhancements
    
    23 23
       * Changes
    
    24 24
         * Update to ASDF 3.3.6
    
    25
    +    * The default external format is `:utf-8` instead of `:iso8859-1`
    
    25 26
       * ANSI compliance fixes:
    
    26 27
       * Bug fixes:
    
    27 28
         * ~~#97~~ Fixes stepping through the source forms in the debugger.  This has been broken for quite some time, but it works now.
    
    ... ... @@ -50,13 +51,19 @@ public domain.
    50 51
         * ~~#113~~ REQUIRE on contribs can pull in the wrong things via ASDF.
    
    51 52
         * ~~#121~~ Wrong column index in FILL-POINTER-OUTPUT-STREAM
    
    52 53
         * ~~#122~~ gcc 11 can't build cmucl
    
    54
    +    * ~~#124~~ directory with `:wild-inferiors` doesn't descend subdirectories 
    
    53 55
         * ~~#125~~ Linux `unix-stat` returning incorrect values
    
    54 56
         * ~~#127~~ Linux unix-getpwuid segfaults when given non-existent uid.
    
    55 57
         * ~~#128~~ `QUIT` accepts an exit code
    
    58
    +    * ~~#130~~ Move file-author to C 
    
    56 59
         * ~~#132~~ Ansi test `RENAME-FILE.1` no fails
    
    57 60
         * ~~#134~~ Handle the case of `(expt complex complex-rational)`
    
    58 61
         * ~~#136~~ `ensure-directories-exist` should return the given pathspec
    
    62
    +    * #139 `*default-external-format*` defaults to `:utf-8`
    
    63
    +    * ~~#141~~ Disallow locales that are pathnames to a localedef file
    
    59 64
         * ~~#142~~ `(random 0)` signals incorrect error
    
    65
    +    * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream`
    
    66
    +    * ~~#149~~ Call setlocale(3C) on startup
    
    60 67
       * Other changes:
    
    61 68
       * Improvements to the PCL implementation of CLOS:
    
    62 69
       * Changes to building procedure:
    

  • src/i18n/locale/cmucl-unix.pot
    ... ... @@ -1424,3 +1424,14 @@ msgid ""
    1424 1424
     "   doesn't work."
    
    1425 1425
     msgstr ""
    
    1426 1426
     
    
    1427
    +#: src/code/unix.lisp
    
    1428
    +msgid "Call setlocale(3c) with fixed args.  Returns 0 on success."
    
    1429
    +msgstr ""
    
    1430
    +
    
    1431
    +#: src/code/unix.lisp
    
    1432
    +msgid ""
    
    1433
    +"Get the current locale.  If we can't, return NIL.  A call to\n"
    
    1434
    +"  UNIX-SETLOCALE must have been done previously before calling this so\n"
    
    1435
    +"  that the correct locale is returned."
    
    1436
    +msgstr ""
    
    1437
    +

  • src/lisp/os-common.c
    ... ... @@ -7,6 +7,7 @@
    7 7
     
    
    8 8
     #include <assert.h>
    
    9 9
     #include <errno.h>
    
    10
    +#include <locale.h>
    
    10 11
     #include <math.h>
    
    11 12
     #include <netdb.h>
    
    12 13
     #include <pwd.h>
    
    ... ... @@ -773,3 +774,25 @@ exit:
    773 774
         
    
    774 775
         return result;
    
    775 776
     }
    
    777
    +
    
    778
    +int
    
    779
    +os_setlocale(void)
    
    780
    +{
    
    781
    +    char *result = setlocale(LC_ALL, "");
    
    782
    +
    
    783
    +    /* Return 0 if setlocale suceeded; otherwise -1. */
    
    784
    +    return result != NULL ? 0 : -1;
    
    785
    +}
    
    786
    +
    
    787
    +int
    
    788
    +os_getlocale(char *buf, int len)
    
    789
    +{
    
    790
    +    char *locale = setlocale(LC_ALL, NULL);
    
    791
    +    if (locale) {
    
    792
    +        strncpy(buf, locale, len - 1);
    
    793
    +        buf[len - 1] = '\0';
    
    794
    +    }
    
    795
    +
    
    796
    +    /* Return -1 if setlocale failed. */
    
    797
    +    return locale ? 0 : -1;
    
    798
    +}

  • src/pcl/gray-streams.lisp
    ... ... @@ -235,6 +235,9 @@
    235 235
       defined for this function, although it is permissible for it to
    
    236 236
       always return NIL."))
    
    237 237
     
    
    238
    +(defmethod stream-line-column ((stream fundamental-character-output-stream))
    
    239
    +  nil)
    
    240
    +
    
    238 241
     ;;; Stream-line-length is a CMUCL extension to Gray streams.
    
    239 242
     (defgeneric stream-line-length (stream)
    
    240 243
       (:documentation _N"Return the stream line length or Nil."))
    

  • src/pcl/simple-streams/external-formats/aliases
    ... ... @@ -223,6 +223,8 @@ windows-cp1252 cp1252
    223 223
     windows-latin1	cp1252
    
    224 224
     ms-ansi		cp1252
    
    225 225
     
    
    226
    +euckr		euc-kr
    
    227
    +cp949		euc-kr
    
    226 228
     ;; These are not yet implemented
    
    227 229
     ;;iso-2022-jp	iso2022-jp
    
    228 230
     ;;iso2022jp	iso2022-jp
    

  • tests/.gitignore
    1
    +/out-utf8.txt

  • tests/issues.lisp
    ... ... @@ -5,6 +5,12 @@
    5 5
     
    
    6 6
     (in-package "ISSUES-TESTS")
    
    7 7
     
    
    8
    +(defparameter *test-path*
    
    9
    +  (merge-pathnames (make-pathname :name :unspecific :type :unspecific
    
    10
    +                                  :version :unspecific)
    
    11
    +                   *load-truename*)
    
    12
    +  "Path to where this file is.")
    
    13
    +
    
    8 14
     (defun square (x)
    
    9 15
       (expt x 2))
    
    10 16
     
    
    ... ... @@ -676,4 +682,73 @@
    676 682
       ;; work and not return NIL.
    
    677 683
       (assert-true (file-author "."))
    
    678 684
       (assert-true (file-author "bin/build.sh"))
    
    679
    -  (assert-true (file-author "tests/안녕하십니까.txt")))
    685
    +  (let ((unix::*filename-encoding* :utf-8))
    
    686
    +    ;; Set filename encoding to utf-8 so that we can encode the
    
    687
    +    ;; filename properly.
    
    688
    +    (assert-true
    
    689
    +   (file-author
    
    690
    +    (merge-pathnames 
    
    691
    +     (concatenate 'string
    
    692
    +		  ;; Write the test file name this way so
    
    693
    +		  ;; that it's independent of the encoding
    
    694
    +		  ;; used to load this file.  The name is
    
    695
    +		  ;; "안녕하십니까".
    
    696
    +		  '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha
    
    697
    +		    #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga)
    
    698
    +		  ".txt")
    
    699
    +     *test-path*)))))
    
    700
    +
    
    701
    +(define-test issue.139-default-external-format
    
    702
    +    (:tag :issues)
    
    703
    +  (assert-eq :utf-8 stream:*default-external-format*))
    
    704
    +
    
    705
    +(define-test issue.139-default-external-format-read-file
    
    706
    +    (:tag :issues)
    
    707
    +  (let ((string (concatenate 'string
    
    708
    +			     ;; This is "hello" in Korean
    
    709
    +			     '(#\Hangul_syllable_an
    
    710
    +			       #\Hangul_Syllable_Nyeong
    
    711
    +			       #\Hangul_Syllable_Ha
    
    712
    +			       #\Hangul_Syllable_Se
    
    713
    +			       #\Hangul_Syllable_Yo))))
    
    714
    +    ;; Test that opening a file for reading uses the the default :utf8
    
    715
    +    ;; encoding.
    
    716
    +    (with-open-file (s (merge-pathnames "utf8.txt"
    
    717
    +					*test-path*)
    
    718
    +		       :direction :input)
    
    719
    +      ;; The first line should be "hello" in Hangul.
    
    720
    +      (assert-equal (map 'list #'char-name string)
    
    721
    +		    (map 'list #'char-name (read-line s))))))
    
    722
    +
    
    723
    +(define-test issue.139-default-external-format-write-file
    
    724
    +    (:tag :issues)
    
    725
    +  ;; Test that opening a file for writing uses the default :utf8.
    
    726
    +  ;; First write something out to the file.  Then read it back in
    
    727
    +  ;; using an explicit format of utf8 and verifying that we got the
    
    728
    +  ;; right contents.
    
    729
    +  (let ((string (concatenate 'string
    
    730
    +			     ;; This is "hello" in Korean
    
    731
    +			     '(#\Hangul_syllable_an
    
    732
    +			       #\Hangul_Syllable_Nyeong
    
    733
    +			       #\Hangul_Syllable_Ha
    
    734
    +			       #\Hangul_Syllable_Se
    
    735
    +			       #\Hangul_Syllable_Yo))))
    
    736
    +    (with-open-file (s (merge-pathnames "out-utf8.txt"
    
    737
    +					*test-path*)
    
    738
    +		       :direction :output
    
    739
    +		       :if-exists :supersede)
    
    740
    +      (write-line string s))
    
    741
    +    (with-open-file (s (merge-pathnames "out-utf8.txt"
    
    742
    +					*test-path*)
    
    743
    +		       :direction :input
    
    744
    +		       :external-format :utf-8)
    
    745
    +      (assert-equal (map 'list #'char-name string)
    
    746
    +		    (map 'list #'char-name (read-line s))))))
    
    747
    +  
    
    748
    +
    
    749
    +(define-test issue.150
    
    750
    +    (:tag :issues)
    
    751
    +  (let ((ext:*gc-verbose* nil)
    
    752
    +	(*compile-print* nil))
    
    753
    +    (assert-true (stream::find-external-format :euckr))
    
    754
    +    (assert-true (stream::find-external-format :cp949))))

  • tests/utf8.txt
    1
    +안녕하세요
    
    2
    +UTF8 test.  The above line is "Hello" in Hangul.