Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl
Commits: 832e1d7e by Raymond Toy at 2024-05-17T07:54:35-07:00 Clean up comments and code
Add some better comments and remove some old commented-out code.
- - - - - 48806331 by Raymond Toy at 2024-05-17T08:42:41-07:00 Make nstring casing consistent with char casing.
Use char-upcase/downcase to compute the desired case for nstring-upcase/downcase. Add tests for this as well.
- - - - - 7431f3a8 by Raymond Toy at 2024-05-17T08:43:27-07:00 Update pot file for updated docstring
- - - - -
4 changed files:
- src/code/char.lisp - src/code/string.lisp - src/i18n/locale/cmucl.pot - tests/string.lisp
Changes:
===================================== src/code/char.lisp ===================================== @@ -60,10 +60,12 @@ (deftype codepoint () `(integer 0 (,codepoint-limit)))
-;; This MUST be greater than or equal to 127! +;; This MUST be greater than or equal to 127! Be very careful about +;; changing this. It affects all kinds of casing issues. (defconstant +unicode-lower-limit+ 191 - "A character code strictly larger than this is handled using Unicode rules.") + "A character code strictly larger than this is handled using Unicode + rules.")
(macrolet ((frob (char-names-list)
===================================== src/code/string.lisp ===================================== @@ -640,12 +640,8 @@ (declare (fixnum index new-index)) (multiple-value-bind (code wide) (codepoint string index) (when wide (incf index)) - ;; Handle ASCII specially because this is called early in - ;; initialization, before unidata is available. - #+nil - (cond ((< 96 code 123) (decf code 32)) - #+unicode - ((> code 127) (setq code (unicode-upper code)))) + ;; Use char-upcase if it's not a surrogate pair so that + ;; we're always consist. (if wide (setq code (unicode-upper code)) (setf code (char-code (char-upcase (code-char code))))) @@ -686,11 +682,8 @@ (declare (fixnum index new-index)) (multiple-value-bind (code wide) (codepoint string index) (when wide (incf index)) - ;; Handle ASCII specially because this is called early in - ;; initialization, before unidata is available. - #+nil - (cond ((< 64 code 91) (incf code 32)) - ((> code 127) (setq code (unicode-lower code)))) + ;; Use char-downcase if it's not a surrogate pair so that + ;; we're always consist. (if wide (setq code (unicode-lower code)) (setq code (char-code (char-downcase (code-char code))))) @@ -761,12 +754,9 @@ ((= index (the fixnum end))) (declare (fixnum index)) (multiple-value-bind (code wide) (codepoint string index) - (declare (ignore wide)) - ;; Handle ASCII specially because this is called early in - ;; initialization, before unidata is available. - (cond ((< 96 code 123) (decf code 32)) - #+unicode - ((> code 127) (setq code (unicode-upper code)))) + (if wide + (setq code (unicode-upper code)) + (setf code (char-code (char-upcase (code-char code))))) ;;@@ WARNING: this may, in theory, need to extend string ;; (which, obviously, we can't do here. Unless ;; STRING is adjustable, maybe) @@ -788,10 +778,9 @@ ((= index (the fixnum end))) (declare (fixnum index)) (multiple-value-bind (code wide) (codepoint string index) - (declare (ignore wide)) - (cond ((< 64 code 91) (incf code 32)) - #+unicode - ((> code 127) (setq code (unicode-lower code)))) + (if wide + (setq code (unicode-lower code)) + (setf code (char-code (char-downcase (code-char code))))) ;;@@ WARNING: this may, in theory, need to extend string ;; (which, obviously, we can't do here. Unless ;; STRING is adjustable, maybe)
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -5445,7 +5445,8 @@ msgstr ""
#: src/code/char.lisp msgid "" -"A character code strictly larger than this is handled using Unicode rules." +"A character code strictly larger than this is handled using Unicode\n" +" rules." msgstr ""
#: src/code/char.lisp
===================================== tests/string.lisp ===================================== @@ -22,7 +22,7 @@ (loop for expected across s and actual across s-upcase when (char/= actual (char-upcase expected)) - collect (list (char-upcase (char-code expected)) + collect (list (char-upcase expected) (char-code actual))))))
(define-test string-downcase @@ -33,5 +33,29 @@ (loop for expected across s and actual across s-downcase when (char/= actual (char-downcase expected)) - collect (list (char-downcase (char-code expected)) + collect (list (char-downcase expected) + (char-code actual)))))) + +(define-test nstring-upcase + (:tag :issues) + (let* ((s (make-test-string)) + (ns (make-test-string)) + (ns-upcase (nstring-upcase ns))) + (assert-false + (loop for expected across s + and actual across ns + when (char/= actual (char-upcase expected)) + collect (list (char-upcase expected) + (char-code actual)))))) + +(define-test nstring-downcase + (:tag :issues) + (let* ((s (make-test-string)) + (ns (make-test-string)) + (ns-downcase (nstring-downcase ns))) + (assert-false + (loop for expected across s + and actual across ns + when (char/= actual (char-downcase expected)) + collect (list (char-downcase expected) (char-code actual))))))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6d80142c9e62bfe0ef6f426...