Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl
Commits:
-
1652ad4b
by Raymond Toy at 2024-05-13T20:19:02-07:00
-
6633e24c
by Raymond Toy at 2024-05-13T20:21:17-07:00
3 changed files:
Changes:
... | ... | @@ -215,7 +215,8 @@ |
215 | 215 | (let ((m (char-code (the base-char char))))
|
216 | 216 | (or (< 31 m 127)
|
217 | 217 | #+(and unicode (not unicode-bootstrap))
|
218 | - (and (> m +unicode-lower-limit+)
|
|
218 | + (and (/= m 181)
|
|
219 | + (> m +unicode-lower-limit+)
|
|
219 | 220 | (>= (unicode-category m) +unicode-category-graphic+))))))
|
220 | 221 | |
221 | 222 | |
... | ... | @@ -272,6 +273,11 @@ |
272 | 273 | (or (< 64 m 91) (< 96 m 123)
|
273 | 274 | #+(and unicode (not unicode-bootstrap))
|
274 | 275 | (and (> m +unicode-lower-limit+)
|
276 | + ;; Unicode says Micro_sign is a lower case letter, but
|
|
277 | + ;; for CL, we don't want it to be a lower case letter.
|
|
278 | + ;; This is for compatibility with other Lisp
|
|
279 | + ;; implementations.
|
|
280 | + (/= m 181)
|
|
275 | 281 | (<= +unicode-category-upper+
|
276 | 282 | (unicode-category m)
|
277 | 283 | +unicode-category-lower+)))))
|
... | ... | @@ -464,8 +470,12 @@ |
464 | 470 | char)
|
465 | 471 | #+(and unicode (not unicode-bootstrap))
|
466 | 472 | (let ((m (char-code char)))
|
467 | - (cond ((> m +unicode-lower-limit+) (code-char (unicode-upper m)))
|
|
468 | - ((< 96 m 123) (code-char (- m 32)))
|
|
473 | + (cond ((< 96 m 123) (code-char (- m 32)))
|
|
474 | + ((= m 181) char)
|
|
475 | + ((> m +unicode-lower-limit+)
|
|
476 | + (if (member (unicode-category m) '(92 32 75 109))
|
|
477 | + char
|
|
478 | + (code-char (unicode-upper m))))
|
|
469 | 479 | (t char))))
|
470 | 480 | |
471 | 481 | (defun char-titlecase (char)
|
... | ... | @@ -490,7 +500,10 @@ |
490 | 500 | char)
|
491 | 501 | #+(and unicode (not unicode-bootstrap))
|
492 | 502 | (let ((m (char-code char)))
|
493 | - (cond ((> m +unicode-lower-limit+) (code-char (unicode-lower m)))
|
|
503 | + (cond ((> m +unicode-lower-limit+)
|
|
504 | + (if (member (unicode-category m) '(92 75 109))
|
|
505 | + char
|
|
506 | + (code-char (unicode-lower m))))
|
|
494 | 507 | ((< 64 m 91) (code-char (+ m 32)))
|
495 | 508 | (t char))))
|
496 | 509 |
... | ... | @@ -883,9 +883,7 @@ |
883 | 883 | (unless (unidata-scase *unicode-data*) (load-scase))
|
884 | 884 | (let* ((scase (unidata-scase *unicode-data*))
|
885 | 885 | (n (logand (qref32 scase code) #xFF)))
|
886 | - (if (or (zerop n)
|
|
887 | - ;; Ignore category Lt, Mn, Nl, So
|
|
888 | - (member (unicode-category code) '(92 32 75 109)))
|
|
886 | + (if (zerop n)
|
|
889 | 887 | code
|
890 | 888 | (let* ((m (aref (scase-svec scase) (logand n #x7F))))
|
891 | 889 | (if (logbitp 7 n) (+ code m) (- code m))))))
|
... | ... | @@ -896,9 +894,7 @@ |
896 | 894 | (unless (unidata-scase *unicode-data*) (load-scase))
|
897 | 895 | (let* ((scase (unidata-scase *unicode-data*))
|
898 | 896 | (n (logand (ash (qref32 scase code) -8) #xFF)))
|
899 | - (if (or (zerop n)
|
|
900 | - ;; Ignore category Lt, Nl, So
|
|
901 | - (member (unicode-category code) '(92 75 109)))
|
|
897 | + (if (zerop n)
|
|
902 | 898 | code
|
903 | 899 | (let ((m (aref (scase-svec scase) (logand n #x7F))))
|
904 | 900 | (if (logbitp 7 n) (+ code m) (- code m))))))
|
... | ... | @@ -3337,27 +3337,36 @@ |
3337 | 3337 | |
3338 | 3338 | (deftransform char-upcase ((x) (base-char))
|
3339 | 3339 | "open code"
|
3340 | + ;; NOTE: This MUST match what the function char-upcase does.
|
|
3340 | 3341 | #-(and unicode (not unicode-bootstrap))
|
3341 | 3342 | '(if (lower-case-p x)
|
3342 | 3343 | (code-char (- (char-code x) 32))
|
3343 | 3344 | x)
|
3344 | 3345 | #+(and unicode (not unicode-bootstrap))
|
3345 | 3346 | '(let ((m (char-code x)))
|
3346 | - (cond ((> m 127) (code-char (lisp::unicode-upper m)))
|
|
3347 | - ((< 96 m 123) (code-char (- m 32)))
|
|
3347 | + (cond ((< 96 m 123) (code-char (- m 32)))
|
|
3348 | + ((= m 181) x)
|
|
3349 | + ((> m lisp::+unicode-lower-limit+)
|
|
3350 | + (if (member (unicode-category m) '(92 32 75 109))
|
|
3351 | + x
|
|
3352 | + (code-char (lisp::unicode-upper m))))
|
|
3348 | 3353 | (t x))))
|
3349 | 3354 | |
3350 | 3355 | (deftransform char-downcase ((x) (base-char))
|
3351 | 3356 | "open code"
|
3357 | + ;; NOTE: This MUST match what the function char-downcase does.
|
|
3352 | 3358 | #-(and unicode (not unicode-bootstrap))
|
3353 | 3359 | '(if (upper-case-p x)
|
3354 | 3360 | (code-char (+ (char-code x) 32))
|
3355 | 3361 | x)
|
3356 | 3362 | #+(and unicode (not unicode-bootstrap))
|
3357 | 3363 | '(let ((m (char-code x)))
|
3358 | - (cond ((> m 127) (code-char (lisp::unicode-lower m)))
|
|
3359 | - ((< 64 m 91) (code-char (+ m 32)))
|
|
3360 | - (t x))))
|
|
3364 | + (cond ((> m lisp::+unicode-lower-limit+)
|
|
3365 | + (if (member (unicode-category m) '(92 75 109))
|
|
3366 | + x
|
|
3367 | + (code-char (lisp::unicode-lower m))))
|
|
3368 | + ((< 64 m 91) (code-char (+ m 32)))
|
|
3369 | + (t x))))
|
|
3361 | 3370 | |
3362 | 3371 | |
3363 | 3372 | ;;;; Equality predicate transforms:
|