Raymond Toy pushed to branch issue-326-char-casing-cleanup at cmucl / cmucl Commits: b8887cd6 by Raymond Toy at 2024-06-03T14:38:08-07:00 Fix typo in the lower limit for char-upcase. We use a lower limit of `#\a`, but we really should have used `#\``. - - - - - e4a3d3b5 by Raymond Toy at 2024-06-03T15:59:09-07:00 Fix equal-char-code to use CL lowercase Previously, `equal-char-code` used the Unicode function `unicode-lower` to convert the character to lower case. That's not what we want; we want to use the CL lower case character. Thus, use `case-mapping-lower-case` to get the desired character code. Add a test for `char-equal` that shows we've fixed this. - - - - - c3e7e0b4 by Raymond Toy at 2024-06-03T16:11:47-07:00 Simplify equal-char-code as suggested. Replace the nested `if`'s with a `cond`. Change the bounds to be inclusive to make it easier to reason about the range of characters. - - - - - 7c711f4f by Raymond Toy at 2024-06-03T16:12:48-07:00 Fix a typo and add a comment. - - - - - 3 changed files: - src/code/char.lisp - src/compiler/srctran.lisp - + tests/char.lisp Changes: ===================================== src/code/char.lisp ===================================== @@ -418,14 +418,14 @@ (defmacro equal-char-code (character) `(let ((ch (char-code ,character))) - ;; Handle ASCII separately for bootstrapping and for unidata missing. - (if (< (char-code #\@) ch (char-code #\[)) - (+ ch 32) - #-(and unicode (not unicode-bootstrap)) - ch - #+(and unicode (not unicode-bootstrap)) - (if (> ch +ascii-limit+) (unicode-lower ch) ch)))) - + ;; Handle ASCII separately for bootstrapping. + (cond ((<= (char-code #\A) ch (char-code #\Z)) + (logxor ch #x20)) + #+(and unicode (not unicode-bootstrap)) + ((> ch +ascii-limit+) + (case-mapping-lower-case ch)) + (t + ch)))) (defun char-equal (character &rest more-characters) "Returns T if all of its arguments are the same character. ===================================== src/compiler/srctran.lisp ===================================== @@ -3343,7 +3343,7 @@ x) #+(and unicode (not unicode-bootstrap)) '(let ((m (char-code x))) - (cond ((< (char-code #\a) m (char-code #\{)) + (cond ((< (char-code #\`) m (char-code #\{)) (code-char (logxor m #x20))) ((> m lisp::+ascii-limit+) (code-char (lisp::case-mapping-upper-case m))) ===================================== tests/char.lisp ===================================== @@ -0,0 +1,24 @@ +;; Tests of char functions + +(defpackage :char-tests + (:use :cl :lisp-unit)) + +(in-package "CHAR-TESTS") + +(define-test char-equal + (:tag :issues) + (let ((test-codes + ;; Find all the codes where the CL lower case character + ;; doesn't match the Unicode lower case character. + (loop for code from 128 below char-code-limit + for ch = (code-char code) + when (/= (char-code (char-downcase ch)) (or (lisp::unicode-lower code) code)) + collect code))) + (dolist (code test-codes) + ;; Verify that we convert to the CL lower case character instead + ;; of the Unicode lower case character for the cases where these + ;; are different. + (assert-false (char-equal (code-char (lisp::unicode-lower code)) + (code-char code)) + code + (lisp::unicode-lower code))))) View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/ca36c16321c2028784aaa3b... -- This project does not include diff previews in email notifications. View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/ca36c16321c2028784aaa3b... You're receiving this email because of your account on gitlab.common-lisp.net.