![](https://secure.gravatar.com/avatar/5634a99cd64dd70d4a6692c3031a1284.jpg?s=120&d=mm&r=g)
Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl Commits: 832e1d7e by Raymond Toy at 2024-05-17T07:54:35-07:00 Clean up comments and code Add some better comments and remove some old commented-out code. - - - - - 48806331 by Raymond Toy at 2024-05-17T08:42:41-07:00 Make nstring casing consistent with char casing. Use char-upcase/downcase to compute the desired case for nstring-upcase/downcase. Add tests for this as well. - - - - - 7431f3a8 by Raymond Toy at 2024-05-17T08:43:27-07:00 Update pot file for updated docstring - - - - - 4 changed files: - src/code/char.lisp - src/code/string.lisp - src/i18n/locale/cmucl.pot - tests/string.lisp Changes: ===================================== src/code/char.lisp ===================================== @@ -60,10 +60,12 @@ (deftype codepoint () `(integer 0 (,codepoint-limit))) -;; This MUST be greater than or equal to 127! +;; This MUST be greater than or equal to 127! Be very careful about +;; changing this. It affects all kinds of casing issues. (defconstant +unicode-lower-limit+ 191 - "A character code strictly larger than this is handled using Unicode rules.") + "A character code strictly larger than this is handled using Unicode + rules.") (macrolet ((frob (char-names-list) ===================================== src/code/string.lisp ===================================== @@ -640,12 +640,8 @@ (declare (fixnum index new-index)) (multiple-value-bind (code wide) (codepoint string index) (when wide (incf index)) - ;; Handle ASCII specially because this is called early in - ;; initialization, before unidata is available. - #+nil - (cond ((< 96 code 123) (decf code 32)) - #+unicode - ((> code 127) (setq code (unicode-upper code)))) + ;; Use char-upcase if it's not a surrogate pair so that + ;; we're always consist. (if wide (setq code (unicode-upper code)) (setf code (char-code (char-upcase (code-char code))))) @@ -686,11 +682,8 @@ (declare (fixnum index new-index)) (multiple-value-bind (code wide) (codepoint string index) (when wide (incf index)) - ;; Handle ASCII specially because this is called early in - ;; initialization, before unidata is available. - #+nil - (cond ((< 64 code 91) (incf code 32)) - ((> code 127) (setq code (unicode-lower code)))) + ;; Use char-downcase if it's not a surrogate pair so that + ;; we're always consist. (if wide (setq code (unicode-lower code)) (setq code (char-code (char-downcase (code-char code))))) @@ -761,12 +754,9 @@ ((= index (the fixnum end))) (declare (fixnum index)) (multiple-value-bind (code wide) (codepoint string index) - (declare (ignore wide)) - ;; Handle ASCII specially because this is called early in - ;; initialization, before unidata is available. - (cond ((< 96 code 123) (decf code 32)) - #+unicode - ((> code 127) (setq code (unicode-upper code)))) + (if wide + (setq code (unicode-upper code)) + (setf code (char-code (char-upcase (code-char code))))) ;;@@ WARNING: this may, in theory, need to extend string ;; (which, obviously, we can't do here. Unless ;; STRING is adjustable, maybe) @@ -788,10 +778,9 @@ ((= index (the fixnum end))) (declare (fixnum index)) (multiple-value-bind (code wide) (codepoint string index) - (declare (ignore wide)) - (cond ((< 64 code 91) (incf code 32)) - #+unicode - ((> code 127) (setq code (unicode-lower code)))) + (if wide + (setq code (unicode-lower code)) + (setf code (char-code (char-downcase (code-char code))))) ;;@@ WARNING: this may, in theory, need to extend string ;; (which, obviously, we can't do here. Unless ;; STRING is adjustable, maybe) ===================================== src/i18n/locale/cmucl.pot ===================================== @@ -5445,7 +5445,8 @@ msgstr "" #: src/code/char.lisp msgid "" -"A character code strictly larger than this is handled using Unicode rules." +"A character code strictly larger than this is handled using Unicode\n" +" rules." msgstr "" #: src/code/char.lisp ===================================== tests/string.lisp ===================================== @@ -22,7 +22,7 @@ (loop for expected across s and actual across s-upcase when (char/= actual (char-upcase expected)) - collect (list (char-upcase (char-code expected)) + collect (list (char-upcase expected) (char-code actual)))))) (define-test string-downcase @@ -33,5 +33,29 @@ (loop for expected across s and actual across s-downcase when (char/= actual (char-downcase expected)) - collect (list (char-downcase (char-code expected)) + collect (list (char-downcase expected) + (char-code actual)))))) + +(define-test nstring-upcase + (:tag :issues) + (let* ((s (make-test-string)) + (ns (make-test-string)) + (ns-upcase (nstring-upcase ns))) + (assert-false + (loop for expected across s + and actual across ns + when (char/= actual (char-upcase expected)) + collect (list (char-upcase expected) + (char-code actual)))))) + +(define-test nstring-downcase + (:tag :issues) + (let* ((s (make-test-string)) + (ns (make-test-string)) + (ns-downcase (nstring-downcase ns))) + (assert-false + (loop for expected across s + and actual across ns + when (char/= actual (char-downcase expected)) + collect (list (char-downcase expected) (char-code actual)))))) View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6d80142c9e62bfe0ef6f426... -- This project does not include diff previews in email notifications. View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6d80142c9e62bfe0ef6f426... You're receiving this email because of your account on gitlab.common-lisp.net.