Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl
Commits:
-
832e1d7e
by Raymond Toy at 2024-05-17T07:54:35-07:00
-
48806331
by Raymond Toy at 2024-05-17T08:42:41-07:00
-
7431f3a8
by Raymond Toy at 2024-05-17T08:43:27-07:00
4 changed files:
Changes:
| ... | ... | @@ -60,10 +60,12 @@ |
| 60 | 60 | (deftype codepoint ()
|
| 61 | 61 | `(integer 0 (,codepoint-limit)))
|
| 62 | 62 | |
| 63 | -;; This MUST be greater than or equal to 127!
|
|
| 63 | +;; This MUST be greater than or equal to 127! Be very careful about
|
|
| 64 | +;; changing this. It affects all kinds of casing issues.
|
|
| 64 | 65 | (defconstant +unicode-lower-limit+
|
| 65 | 66 | 191
|
| 66 | - "A character code strictly larger than this is handled using Unicode rules.")
|
|
| 67 | + "A character code strictly larger than this is handled using Unicode
|
|
| 68 | + rules.")
|
|
| 67 | 69 | |
| 68 | 70 | |
| 69 | 71 | (macrolet ((frob (char-names-list)
|
| ... | ... | @@ -640,12 +640,8 @@ |
| 640 | 640 | (declare (fixnum index new-index))
|
| 641 | 641 | (multiple-value-bind (code wide) (codepoint string index)
|
| 642 | 642 | (when wide (incf index))
|
| 643 | - ;; Handle ASCII specially because this is called early in
|
|
| 644 | - ;; initialization, before unidata is available.
|
|
| 645 | - #+nil
|
|
| 646 | - (cond ((< 96 code 123) (decf code 32))
|
|
| 647 | - #+unicode
|
|
| 648 | - ((> code 127) (setq code (unicode-upper code))))
|
|
| 643 | + ;; Use char-upcase if it's not a surrogate pair so that
|
|
| 644 | + ;; we're always consist.
|
|
| 649 | 645 | (if wide
|
| 650 | 646 | (setq code (unicode-upper code))
|
| 651 | 647 | (setf code (char-code (char-upcase (code-char code)))))
|
| ... | ... | @@ -686,11 +682,8 @@ |
| 686 | 682 | (declare (fixnum index new-index))
|
| 687 | 683 | (multiple-value-bind (code wide) (codepoint string index)
|
| 688 | 684 | (when wide (incf index))
|
| 689 | - ;; Handle ASCII specially because this is called early in
|
|
| 690 | - ;; initialization, before unidata is available.
|
|
| 691 | - #+nil
|
|
| 692 | - (cond ((< 64 code 91) (incf code 32))
|
|
| 693 | - ((> code 127) (setq code (unicode-lower code))))
|
|
| 685 | + ;; Use char-downcase if it's not a surrogate pair so that
|
|
| 686 | + ;; we're always consist.
|
|
| 694 | 687 | (if wide
|
| 695 | 688 | (setq code (unicode-lower code))
|
| 696 | 689 | (setq code (char-code (char-downcase (code-char code)))))
|
| ... | ... | @@ -761,12 +754,9 @@ |
| 761 | 754 | ((= index (the fixnum end)))
|
| 762 | 755 | (declare (fixnum index))
|
| 763 | 756 | (multiple-value-bind (code wide) (codepoint string index)
|
| 764 | - (declare (ignore wide))
|
|
| 765 | - ;; Handle ASCII specially because this is called early in
|
|
| 766 | - ;; initialization, before unidata is available.
|
|
| 767 | - (cond ((< 96 code 123) (decf code 32))
|
|
| 768 | - #+unicode
|
|
| 769 | - ((> code 127) (setq code (unicode-upper code))))
|
|
| 757 | + (if wide
|
|
| 758 | + (setq code (unicode-upper code))
|
|
| 759 | + (setf code (char-code (char-upcase (code-char code)))))
|
|
| 770 | 760 | ;;@@ WARNING: this may, in theory, need to extend string
|
| 771 | 761 | ;; (which, obviously, we can't do here. Unless
|
| 772 | 762 | ;; STRING is adjustable, maybe)
|
| ... | ... | @@ -788,10 +778,9 @@ |
| 788 | 778 | ((= index (the fixnum end)))
|
| 789 | 779 | (declare (fixnum index))
|
| 790 | 780 | (multiple-value-bind (code wide) (codepoint string index)
|
| 791 | - (declare (ignore wide))
|
|
| 792 | - (cond ((< 64 code 91) (incf code 32))
|
|
| 793 | - #+unicode
|
|
| 794 | - ((> code 127) (setq code (unicode-lower code))))
|
|
| 781 | + (if wide
|
|
| 782 | + (setq code (unicode-lower code))
|
|
| 783 | + (setf code (char-code (char-downcase (code-char code)))))
|
|
| 795 | 784 | ;;@@ WARNING: this may, in theory, need to extend string
|
| 796 | 785 | ;; (which, obviously, we can't do here. Unless
|
| 797 | 786 | ;; STRING is adjustable, maybe)
|
| ... | ... | @@ -5445,7 +5445,8 @@ msgstr "" |
| 5445 | 5445 | |
| 5446 | 5446 | #: src/code/char.lisp
|
| 5447 | 5447 | msgid ""
|
| 5448 | -"A character code strictly larger than this is handled using Unicode rules."
|
|
| 5448 | +"A character code strictly larger than this is handled using Unicode\n"
|
|
| 5449 | +" rules."
|
|
| 5449 | 5450 | msgstr ""
|
| 5450 | 5451 | |
| 5451 | 5452 | #: src/code/char.lisp
|
| ... | ... | @@ -22,7 +22,7 @@ |
| 22 | 22 | (loop for expected across s
|
| 23 | 23 | and actual across s-upcase
|
| 24 | 24 | when (char/= actual (char-upcase expected))
|
| 25 | - collect (list (char-upcase (char-code expected))
|
|
| 25 | + collect (list (char-upcase expected)
|
|
| 26 | 26 | (char-code actual))))))
|
| 27 | 27 | |
| 28 | 28 | (define-test string-downcase
|
| ... | ... | @@ -33,5 +33,29 @@ |
| 33 | 33 | (loop for expected across s
|
| 34 | 34 | and actual across s-downcase
|
| 35 | 35 | when (char/= actual (char-downcase expected))
|
| 36 | - collect (list (char-downcase (char-code expected))
|
|
| 36 | + collect (list (char-downcase expected)
|
|
| 37 | + (char-code actual))))))
|
|
| 38 | + |
|
| 39 | +(define-test nstring-upcase
|
|
| 40 | + (:tag :issues)
|
|
| 41 | + (let* ((s (make-test-string))
|
|
| 42 | + (ns (make-test-string))
|
|
| 43 | + (ns-upcase (nstring-upcase ns)))
|
|
| 44 | + (assert-false
|
|
| 45 | + (loop for expected across s
|
|
| 46 | + and actual across ns
|
|
| 47 | + when (char/= actual (char-upcase expected))
|
|
| 48 | + collect (list (char-upcase expected)
|
|
| 49 | + (char-code actual))))))
|
|
| 50 | + |
|
| 51 | +(define-test nstring-downcase
|
|
| 52 | + (:tag :issues)
|
|
| 53 | + (let* ((s (make-test-string))
|
|
| 54 | + (ns (make-test-string))
|
|
| 55 | + (ns-downcase (nstring-downcase ns)))
|
|
| 56 | + (assert-false
|
|
| 57 | + (loop for expected across s
|
|
| 58 | + and actual across ns
|
|
| 59 | + when (char/= actual (char-downcase expected))
|
|
| 60 | + collect (list (char-downcase expected)
|
|
| 37 | 61 | (char-code actual)))))) |