Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 1300830b by Raymond Toy at 2022-10-31T17:12:48+00:00 Address #139: *default-external-format* is :utf-8
- - - - - 649a4f1e by Raymond Toy at 2022-10-31T17:12:49+00:00 Merge branch 'issue-139-default-external-format-utf8' into 'master'
Address #139: *default-external-format* is :utf-8
See merge request cmucl/cmucl!103 - - - - -
5 changed files:
- src/code/extfmts.lisp - src/general-info/release-21e.md - + tests/.gitignore - tests/issues.lisp - + tests/utf8.txt
Changes:
===================================== src/code/extfmts.lisp ===================================== @@ -22,7 +22,7 @@ describe-external-format))
(defvar *default-external-format* - :iso8859-1 + :utf-8 "The default external format to use if no other external format is specified")
===================================== src/general-info/release-21e.md ===================================== @@ -22,6 +22,7 @@ public domain. * Feature enhancements * Changes * Update to ASDF 3.3.6 + * The default external format is `:utf-8` instead of `:iso8859-1` * ANSI compliance fixes: * Bug fixes: * ~~#97~~ Fixes stepping through the source forms in the debugger. This has been broken for quite some time, but it works now. @@ -56,6 +57,7 @@ public domain. * ~~#132~~ Ansi test `RENAME-FILE.1` no fails * ~~#134~~ Handle the case of `(expt complex complex-rational)` * ~~#136~~ `ensure-directories-exist` should return the given pathspec + * #139 `*default-external-format*` defaults to `:utf-8` * ~~#142~~ `(random 0)` signals incorrect error * Other changes: * Improvements to the PCL implementation of CLOS:
===================================== tests/.gitignore ===================================== @@ -0,0 +1 @@ +/out-utf8.txt
===================================== tests/issues.lisp ===================================== @@ -5,6 +5,12 @@
(in-package "ISSUES-TESTS")
+(defparameter *test-path* + (merge-pathnames (make-pathname :name :unspecific :type :unspecific + :version :unspecific) + *load-truename*) + "Path to where this file is.") + (defun square (x) (expt x 2))
@@ -676,4 +682,66 @@ ;; work and not return NIL. (assert-true (file-author ".")) (assert-true (file-author "bin/build.sh")) - (assert-true (file-author "tests/안녕하십니까.txt"))) + (let ((unix::*filename-encoding* :utf-8)) + ;; Set filename encoding to utf-8 so that we can encode the + ;; filename properly. + (assert-true + (file-author + (merge-pathnames + (concatenate 'string + ;; Write the test file name this way so + ;; that it's independent of the encoding + ;; used to load this file. The name is + ;; "안녕하십니까". + '(#\Hangul_Syllable_An #\Hangul_Syllable_Nyeong #\Hangul_Syllable_Ha + #\Hangul_Syllable_Sib #\Hangul_Syllable_Ni #\Hangul_Syllable_Gga) + ".txt") + *test-path*))))) + +(define-test issue.139-default-external-format + (:tag :issues) + (assert-eq :utf-8 stream:*default-external-format*)) + +(define-test issue.139-default-external-format-read-file + (:tag :issues) + (let ((string (concatenate 'string + ;; This is "hello" in Korean + '(#\Hangul_syllable_an + #\Hangul_Syllable_Nyeong + #\Hangul_Syllable_Ha + #\Hangul_Syllable_Se + #\Hangul_Syllable_Yo)))) + ;; Test that opening a file for reading uses the the default :utf8 + ;; encoding. + (with-open-file (s (merge-pathnames "utf8.txt" + *test-path*) + :direction :input) + ;; The first line should be "hello" in Hangul. + (assert-equal (map 'list #'char-name string) + (map 'list #'char-name (read-line s)))))) + +(define-test issue.139-default-external-format-write-file + (:tag :issues) + ;; Test that opening a file for writing uses the default :utf8. + ;; First write something out to the file. Then read it back in + ;; using an explicit format of utf8 and verifying that we got the + ;; right contents. + (let ((string (concatenate 'string + ;; This is "hello" in Korean + '(#\Hangul_syllable_an + #\Hangul_Syllable_Nyeong + #\Hangul_Syllable_Ha + #\Hangul_Syllable_Se + #\Hangul_Syllable_Yo)))) + (with-open-file (s (merge-pathnames "out-utf8.txt" + *test-path*) + :direction :output + :if-exists :supersede) + (write-line string s)) + (with-open-file (s (merge-pathnames "out-utf8.txt" + *test-path*) + :direction :input + :external-format :utf-8) + (assert-equal (map 'list #'char-name string) + (map 'list #'char-name (read-line s)))))) +
===================================== tests/utf8.txt ===================================== @@ -0,0 +1,2 @@ +안녕하세요 +UTF8 test. The above line is "Hello" in Hangul.
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/0dad5a1a767748e6c4fafba...