![](https://secure.gravatar.com/avatar/5634a99cd64dd70d4a6692c3031a1284.jpg?s=120&d=mm&r=g)
Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl Commits: 1652ad4b by Raymond Toy at 2024-05-13T20:19:02-07:00 Implement the category cases in the CL functions, not Unicode Previously, we had unicode-upper/lower functions had the code to exclude the characters from the categories we didn't want. However, this should really be done in char-upcase/downcase because that's what we want from CL, not Unicode. - - - - - 6633e24c by Raymond Toy at 2024-05-13T20:21:17-07:00 Update the deftransforms for char-upcase/downcase to match The deftransforms for char-upcase/downcase better match the code for the functions char-upcase/downcase, otherwise, we get totally confusing results. I just copied the code from the functions into the deftransforms. We really need a better way to do this to guarantee this automatically! - - - - - 3 changed files: - src/code/char.lisp - src/code/unidata.lisp - src/compiler/srctran.lisp Changes: ===================================== src/code/char.lisp ===================================== @@ -215,7 +215,8 @@ (let ((m (char-code (the base-char char)))) (or (< 31 m 127) #+(and unicode (not unicode-bootstrap)) - (and (> m +unicode-lower-limit+) + (and (/= m 181) + (> m +unicode-lower-limit+) (>= (unicode-category m) +unicode-category-graphic+)))))) @@ -272,6 +273,11 @@ (or (< 64 m 91) (< 96 m 123) #+(and unicode (not unicode-bootstrap)) (and (> m +unicode-lower-limit+) + ;; Unicode says Micro_sign is a lower case letter, but + ;; for CL, we don't want it to be a lower case letter. + ;; This is for compatibility with other Lisp + ;; implementations. + (/= m 181) (<= +unicode-category-upper+ (unicode-category m) +unicode-category-lower+))))) @@ -464,8 +470,12 @@ char) #+(and unicode (not unicode-bootstrap)) (let ((m (char-code char))) - (cond ((> m +unicode-lower-limit+) (code-char (unicode-upper m))) - ((< 96 m 123) (code-char (- m 32))) + (cond ((< 96 m 123) (code-char (- m 32))) + ((= m 181) char) + ((> m +unicode-lower-limit+) + (if (member (unicode-category m) '(92 32 75 109)) + char + (code-char (unicode-upper m)))) (t char)))) (defun char-titlecase (char) @@ -490,7 +500,10 @@ char) #+(and unicode (not unicode-bootstrap)) (let ((m (char-code char))) - (cond ((> m +unicode-lower-limit+) (code-char (unicode-lower m))) + (cond ((> m +unicode-lower-limit+) + (if (member (unicode-category m) '(92 75 109)) + char + (code-char (unicode-lower m)))) ((< 64 m 91) (code-char (+ m 32))) (t char)))) ===================================== src/code/unidata.lisp ===================================== @@ -883,9 +883,7 @@ (unless (unidata-scase *unicode-data*) (load-scase)) (let* ((scase (unidata-scase *unicode-data*)) (n (logand (qref32 scase code) #xFF))) - (if (or (zerop n) - ;; Ignore category Lt, Mn, Nl, So - (member (unicode-category code) '(92 32 75 109))) + (if (zerop n) code (let* ((m (aref (scase-svec scase) (logand n #x7F)))) (if (logbitp 7 n) (+ code m) (- code m)))))) @@ -896,9 +894,7 @@ (unless (unidata-scase *unicode-data*) (load-scase)) (let* ((scase (unidata-scase *unicode-data*)) (n (logand (ash (qref32 scase code) -8) #xFF))) - (if (or (zerop n) - ;; Ignore category Lt, Nl, So - (member (unicode-category code) '(92 75 109))) + (if (zerop n) code (let ((m (aref (scase-svec scase) (logand n #x7F)))) (if (logbitp 7 n) (+ code m) (- code m)))))) ===================================== src/compiler/srctran.lisp ===================================== @@ -3337,27 +3337,36 @@ (deftransform char-upcase ((x) (base-char)) "open code" + ;; NOTE: This MUST match what the function char-upcase does. #-(and unicode (not unicode-bootstrap)) '(if (lower-case-p x) (code-char (- (char-code x) 32)) x) #+(and unicode (not unicode-bootstrap)) '(let ((m (char-code x))) - (cond ((> m 127) (code-char (lisp::unicode-upper m))) - ((< 96 m 123) (code-char (- m 32))) + (cond ((< 96 m 123) (code-char (- m 32))) + ((= m 181) x) + ((> m lisp::+unicode-lower-limit+) + (if (member (unicode-category m) '(92 32 75 109)) + x + (code-char (lisp::unicode-upper m)))) (t x)))) (deftransform char-downcase ((x) (base-char)) "open code" + ;; NOTE: This MUST match what the function char-downcase does. #-(and unicode (not unicode-bootstrap)) '(if (upper-case-p x) (code-char (+ (char-code x) 32)) x) #+(and unicode (not unicode-bootstrap)) '(let ((m (char-code x))) - (cond ((> m 127) (code-char (lisp::unicode-lower m))) - ((< 64 m 91) (code-char (+ m 32))) - (t x)))) + (cond ((> m lisp::+unicode-lower-limit+) + (if (member (unicode-category m) '(92 75 109)) + x + (code-char (lisp::unicode-lower m)))) + ((< 64 m 91) (code-char (+ m 32))) + (t x)))) ;;;; Equality predicate transforms: View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/7f4f1e7590ba3a2326a91d6... -- This project does not include diff previews in email notifications. View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/7f4f1e7590ba3a2326a91d6... You're receiving this email because of your account on gitlab.common-lisp.net.