Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
-
535f7e5d
by Raymond Toy at 2024-06-03T23:07:07+00:00
-
57d5a21c
by Raymond Toy at 2024-06-03T23:07:12+00:00
5 changed files:
- src/code/string.lisp
- src/code/unicode.lisp
- src/i18n/locale/cmucl.pot
- tests/string.lisp
- tests/unicode.lisp
Changes:
| ... | ... | @@ -638,27 +638,14 @@ |
| 638 | 638 | (new-index (- start offset) (1+ new-index)))
|
| 639 | 639 | ((= index (the fixnum end)))
|
| 640 | 640 | (declare (fixnum index new-index))
|
| 641 | - (multiple-value-bind (code wide) (codepoint string index)
|
|
| 642 | - (when wide (incf index))
|
|
| 643 | - ;; Use char-upcase if it's not a surrogate pair so that
|
|
| 644 | - ;; we're always consist.
|
|
| 645 | - (if wide
|
|
| 646 | - (setq code (unicode-upper code))
|
|
| 647 | - (setf code (char-code (char-upcase (code-char code)))))
|
|
| 648 | - ;;@@ WARNING: this may, in theory, need to extend newstring
|
|
| 649 | - ;; but that never actually occurs as of Unicode 5.1.0,
|
|
| 650 | - ;; so I'm just going to ignore it for now...
|
|
| 651 | - (multiple-value-bind (hi lo) (surrogates code)
|
|
| 652 | - (setf (schar newstring new-index) hi)
|
|
| 653 | - (when lo
|
|
| 654 | - (setf (schar newstring (incf new-index)) lo)))))
|
|
| 655 | - ;;@@ WARNING: see above
|
|
| 656 | - (do ((index end (1+ index))
|
|
| 641 | + (setf (schar newstring new-index)
|
|
| 642 | + (char-upcase (schar string index))))
|
|
| 643 | + (do ((index end (1+ index))
|
|
| 657 | 644 | (new-index (- (the fixnum end) offset) (1+ new-index)))
|
| 658 | 645 | ((= index offset-slen))
|
| 659 | 646 | (declare (fixnum index new-index))
|
| 660 | 647 | (setf (schar newstring new-index) (schar string index)))
|
| 661 | - newstring))))
|
|
| 648 | + newstring))))
|
|
| 662 | 649 | |
| 663 | 650 | (defun string-downcase (string &key (start 0) end)
|
| 664 | 651 | _N"Given a string, returns a new string that is a copy of it with all
|
| ... | ... | @@ -680,64 +667,8 @@ |
| 680 | 667 | (new-index (- start offset) (1+ new-index)))
|
| 681 | 668 | ((= index (the fixnum end)))
|
| 682 | 669 | (declare (fixnum index new-index))
|
| 683 | - (multiple-value-bind (code wide) (codepoint string index)
|
|
| 684 | - (when wide (incf index))
|
|
| 685 | - ;; Use char-downcase if it's not a surrogate pair so that
|
|
| 686 | - ;; we're always consist.
|
|
| 687 | - (if wide
|
|
| 688 | - (setq code (unicode-lower code))
|
|
| 689 | - (setq code (char-code (char-downcase (code-char code)))))
|
|
| 690 | - ;;@@ WARNING: this may, in theory, need to extend newstring
|
|
| 691 | - ;; but that never actually occurs as of Unicode 5.1.0,
|
|
| 692 | - ;; so I'm just going to ignore it for now...
|
|
| 693 | - (multiple-value-bind (hi lo) (surrogates code)
|
|
| 694 | - (setf (schar newstring new-index) hi)
|
|
| 695 | - (when lo
|
|
| 696 | - (setf (schar newstring (incf new-index)) lo)))))
|
|
| 697 | - ;;@@ WARNING: see above
|
|
| 698 | - (do ((index end (1+ index))
|
|
| 699 | - (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
| 700 | - ((= index offset-slen))
|
|
| 701 | - (declare (fixnum index new-index))
|
|
| 702 | - (setf (schar newstring new-index) (schar string index)))
|
|
| 703 | - newstring))))
|
|
| 704 | - |
|
| 705 | -#+nil
|
|
| 706 | -(defun string-capitalize (string &key (start 0) end)
|
|
| 707 | - _N"Given a string, returns a copy of the string with the first
|
|
| 708 | - character of each ``word'' converted to upper-case, and remaining
|
|
| 709 | - chars in the word converted to lower case. A ``word'' is defined
|
|
| 710 | - to be a string of case-modifiable characters delimited by
|
|
| 711 | - non-case-modifiable chars."
|
|
| 712 | - (declare (fixnum start))
|
|
| 713 | - (let* ((string (if (stringp string) string (string string)))
|
|
| 714 | - (slen (length string)))
|
|
| 715 | - (declare (fixnum slen))
|
|
| 716 | - (with-one-string string start end offset
|
|
| 717 | - (let ((offset-slen (+ slen offset))
|
|
| 718 | - (newstring (make-string slen)))
|
|
| 719 | - (declare (fixnum offset-slen))
|
|
| 720 | - (do ((index offset (1+ index))
|
|
| 721 | - (new-index 0 (1+ new-index)))
|
|
| 722 | - ((= index start))
|
|
| 723 | - (declare (fixnum index new-index))
|
|
| 724 | - (setf (schar newstring new-index) (schar string index)))
|
|
| 725 | - (do ((index start (1+ index))
|
|
| 726 | - (new-index (- start offset) (1+ new-index))
|
|
| 727 | - (newword t)
|
|
| 728 | - (char ()))
|
|
| 729 | - ((= index (the fixnum end)))
|
|
| 730 | - (declare (fixnum index new-index))
|
|
| 731 | - (setq char (schar string index))
|
|
| 732 | - (cond ((not (alphanumericp char))
|
|
| 733 | - (setq newword t))
|
|
| 734 | - (newword
|
|
| 735 | - ;;char is first case-modifiable after non-case-modifiable
|
|
| 736 | - (setq char (char-titlecase char))
|
|
| 737 | - (setq newword ()))
|
|
| 738 | - ;;char is case-modifiable, but not first
|
|
| 739 | - (t (setq char (char-downcase char))))
|
|
| 740 | - (setf (schar newstring new-index) char))
|
|
| 670 | + (setf (schar newstring new-index)
|
|
| 671 | + (char-downcase (schar string index))))
|
|
| 741 | 672 | (do ((index end (1+ index))
|
| 742 | 673 | (new-index (- (the fixnum end) offset) (1+ new-index)))
|
| 743 | 674 | ((= index offset-slen))
|
| ... | ... | @@ -796,20 +727,9 @@ |
| 796 | 727 | (do ((index start (1+ index)))
|
| 797 | 728 | ((= index (the fixnum end)))
|
| 798 | 729 | (declare (fixnum index))
|
| 799 | - (multiple-value-bind (code wide) (codepoint string index)
|
|
| 800 | - (if wide
|
|
| 801 | - (setq code (unicode-upper code))
|
|
| 802 | - (setf code (char-code (char-upcase (code-char code)))))
|
|
| 803 | - ;;@@ WARNING: this may, in theory, need to extend string
|
|
| 804 | - ;; (which, obviously, we can't do here. Unless
|
|
| 805 | - ;; STRING is adjustable, maybe)
|
|
| 806 | - ;; but that never actually occurs as of Unicode 5.1.0,
|
|
| 807 | - ;; so I'm just going to ignore it for now...
|
|
| 808 | - (multiple-value-bind (hi lo) (surrogates code)
|
|
| 809 | - (setf (schar string index) hi)
|
|
| 810 | - (when lo
|
|
| 811 | - (setf (schar string (incf index)) lo))))))
|
|
| 812 | - save-header))
|
|
| 730 | + (setf (schar string index)
|
|
| 731 | + (char-upcase (schar string index))))
|
|
| 732 | + save-header)))
|
|
| 813 | 733 | |
| 814 | 734 | (defun nstring-downcase (string &key (start 0) end)
|
| 815 | 735 | _N"Given a string, returns that string with all upper case alphabetic
|
| ... | ... | @@ -820,45 +740,8 @@ |
| 820 | 740 | (do ((index start (1+ index)))
|
| 821 | 741 | ((= index (the fixnum end)))
|
| 822 | 742 | (declare (fixnum index))
|
| 823 | - (multiple-value-bind (code wide) (codepoint string index)
|
|
| 824 | - (if wide
|
|
| 825 | - (setq code (unicode-lower code))
|
|
| 826 | - (setf code (char-code (char-downcase (code-char code)))))
|
|
| 827 | - ;;@@ WARNING: this may, in theory, need to extend string
|
|
| 828 | - ;; (which, obviously, we can't do here. Unless
|
|
| 829 | - ;; STRING is adjustable, maybe)
|
|
| 830 | - ;; but that never actually occurs as of Unicode 5.1.0,
|
|
| 831 | - ;; so I'm just going to ignore it for now...
|
|
| 832 | - (multiple-value-bind (hi lo) (surrogates code)
|
|
| 833 | - (setf (schar string index) hi)
|
|
| 834 | - (when lo
|
|
| 835 | - (setf (schar string (incf index)) lo))))))
|
|
| 836 | - save-header))
|
|
| 837 | - |
|
| 838 | -#+nil
|
|
| 839 | -(defun nstring-capitalize (string &key (start 0) end)
|
|
| 840 | - "Given a string, returns that string with the first
|
|
| 841 | - character of each ``word'' converted to upper-case, and remaining
|
|
| 842 | - chars in the word converted to lower case. A ``word'' is defined
|
|
| 843 | - to be a string of case-modifiable characters delimited by
|
|
| 844 | - non-case-modifiable chars."
|
|
| 845 | - (declare (fixnum start))
|
|
| 846 | - (let ((save-header string))
|
|
| 847 | - (with-one-string string start end offset
|
|
| 848 | - (do ((index start (1+ index))
|
|
| 849 | - (newword t)
|
|
| 850 | - (char ()))
|
|
| 851 | - ((= index (the fixnum end)))
|
|
| 852 | - (declare (fixnum index))
|
|
| 853 | - (setq char (schar string index))
|
|
| 854 | - (cond ((not (alphanumericp char))
|
|
| 855 | - (setq newword t))
|
|
| 856 | - (newword
|
|
| 857 | - ;;char is first case-modifiable after non-case-modifiable
|
|
| 858 | - (setf (schar string index) (char-titlecase char))
|
|
| 859 | - (setq newword ()))
|
|
| 860 | - (t
|
|
| 861 | - (setf (schar string index) (char-downcase char))))))
|
|
| 743 | + (setf (schar string index)
|
|
| 744 | + (char-downcase (schar string index)))))
|
|
| 862 | 745 | save-header))
|
| 863 | 746 | |
| 864 | 747 | (defun nstring-capitalize (string &key (start 0) end)
|
| ... | ... | @@ -17,14 +17,57 @@ |
| 17 | 17 | (in-package "UNICODE")
|
| 18 | 18 | (intl:textdomain "cmucl")
|
| 19 | 19 | |
| 20 | +(defun string-upcase-simple (string &key (start 0) end)
|
|
| 21 | + _N"Given a string, returns a new string that is a copy of it with all
|
|
| 22 | + lower case alphabetic characters converted to uppercase."
|
|
| 23 | + (declare (fixnum start))
|
|
| 24 | + (let* ((string (string string))
|
|
| 25 | + (slen (length string)))
|
|
| 26 | + (declare (fixnum slen))
|
|
| 27 | + (lisp::with-one-string string start end offset
|
|
| 28 | + (let ((offset-slen (+ slen offset))
|
|
| 29 | + (newstring (make-string slen)))
|
|
| 30 | + (declare (fixnum offset-slen))
|
|
| 31 | + (do ((index offset (1+ index))
|
|
| 32 | + (new-index 0 (1+ new-index)))
|
|
| 33 | + ((= index start))
|
|
| 34 | + (declare (fixnum index new-index))
|
|
| 35 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 36 | + (do ((index start (1+ index))
|
|
| 37 | + (new-index (- start offset) (1+ new-index)))
|
|
| 38 | + ((= index (the fixnum end)))
|
|
| 39 | + (declare (fixnum index new-index))
|
|
| 40 | + (multiple-value-bind (code wide) (codepoint string index)
|
|
| 41 | + (when wide (incf index))
|
|
| 42 | + ;; Use char-upcase if it's not a surrogate pair so that
|
|
| 43 | + ;; we're always consist.
|
|
| 44 | + (if wide
|
|
| 45 | + (setq code (unicode-upper code))
|
|
| 46 | + (setf code (char-code (char-upcase (code-char code)))))
|
|
| 47 | + ;;@@ WARNING: this may, in theory, need to extend newstring
|
|
| 48 | + ;; but that never actually occurs as of Unicode 5.1.0,
|
|
| 49 | + ;; so I'm just going to ignore it for now...
|
|
| 50 | + (multiple-value-bind (hi lo) (surrogates code)
|
|
| 51 | + (setf (schar newstring new-index) hi)
|
|
| 52 | + (when lo
|
|
| 53 | + (setf (schar newstring (incf new-index)) lo)))))
|
|
| 54 | + ;;@@ WARNING: see above
|
|
| 55 | + (do ((index end (1+ index))
|
|
| 56 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
| 57 | + ((= index offset-slen))
|
|
| 58 | + (declare (fixnum index new-index))
|
|
| 59 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 60 | + newstring))))
|
|
| 61 | + |
|
| 62 | + |
|
| 20 | 63 | ;; An example where this differs from cl:string-upcase differ:
|
| 21 | 64 | ;; #\Latin_Small_Letter_Sharp_S
|
| 22 | 65 | (defun string-upcase-full (string &key (start 0) end)
|
| 23 | 66 | _N"Given a string, returns a new string that is a copy of it with
|
| 24 | 67 | all lower case alphabetic characters converted to uppercase using
|
| 25 | 68 | full case conversion."
|
| 26 | - (declare (fixnum start)) (let* ((string (if
|
|
| 27 | - (stringp string) string (string string)))
|
|
| 69 | + (declare (fixnum start))
|
|
| 70 | + (let* ((string (string string))
|
|
| 28 | 71 | (slen (length string)))
|
| 29 | 72 | (declare (fixnum slen))
|
| 30 | 73 | (with-output-to-string (s)
|
| ... | ... | @@ -57,11 +100,55 @@ |
| 57 | 100 | all lower case alphabetic characters converted to uppercase. Casing
|
| 58 | 101 | is :simple or :full for simple or full case conversion,
|
| 59 | 102 | respectively."
|
| 60 | - (declare (fixnum start))
|
|
| 61 | - (if (eq casing :simple)
|
|
| 62 | - (cl:string-upcase string :start start :end end)
|
|
| 63 | - (string-upcase-full string :start start :end end)))
|
|
| 103 | + (declare (fixnum start)
|
|
| 104 | + (type (member :simple :full) casing))
|
|
| 105 | + (ecase casing
|
|
| 106 | + (:simple
|
|
| 107 | + (string-upcase-simple string :start start :end end))
|
|
| 108 | + (:full
|
|
| 109 | + (string-upcase-full string :start start :end end))))
|
|
| 64 | 110 | |
| 111 | +(defun string-downcase-simple (string &key (start 0) end)
|
|
| 112 | + _N"Given a string, returns a new string that is a copy of it with all
|
|
| 113 | + upper case alphabetic characters converted to lowercase."
|
|
| 114 | + (declare (fixnum start))
|
|
| 115 | + (let* ((string (if (stringp string) string (string string)))
|
|
| 116 | + (slen (length string)))
|
|
| 117 | + (declare (fixnum slen))
|
|
| 118 | + (lisp::with-one-string string start end offset
|
|
| 119 | + (let ((offset-slen (+ slen offset))
|
|
| 120 | + (newstring (make-string slen)))
|
|
| 121 | + (declare (fixnum offset-slen))
|
|
| 122 | + (do ((index offset (1+ index))
|
|
| 123 | + (new-index 0 (1+ new-index)))
|
|
| 124 | + ((= index start))
|
|
| 125 | + (declare (fixnum index new-index))
|
|
| 126 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 127 | + (do ((index start (1+ index))
|
|
| 128 | + (new-index (- start offset) (1+ new-index)))
|
|
| 129 | + ((= index (the fixnum end)))
|
|
| 130 | + (declare (fixnum index new-index))
|
|
| 131 | + (multiple-value-bind (code wide) (codepoint string index)
|
|
| 132 | + (when wide (incf index))
|
|
| 133 | + ;; Use char-downcase if it's not a surrogate pair so that
|
|
| 134 | + ;; we're always consist.
|
|
| 135 | + (if wide
|
|
| 136 | + (setq code (unicode-lower code))
|
|
| 137 | + (setq code (char-code (char-downcase (code-char code)))))
|
|
| 138 | + ;;@@ WARNING: this may, in theory, need to extend newstring
|
|
| 139 | + ;; but that never actually occurs as of Unicode 5.1.0,
|
|
| 140 | + ;; so I'm just going to ignore it for now...
|
|
| 141 | + (multiple-value-bind (hi lo) (surrogates code)
|
|
| 142 | + (setf (schar newstring new-index) hi)
|
|
| 143 | + (when lo
|
|
| 144 | + (setf (schar newstring (incf new-index)) lo)))))
|
|
| 145 | + ;;@@ WARNING: see above
|
|
| 146 | + (do ((index end (1+ index))
|
|
| 147 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
| 148 | + ((= index offset-slen))
|
|
| 149 | + (declare (fixnum index new-index))
|
|
| 150 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 151 | + newstring))))
|
|
| 65 | 152 | |
| 66 | 153 | ;; An example this differs from cl:string-downcase:
|
| 67 | 154 | ;; #\Latin_Capital_Letter_I_With_Dot_Above.
|
| ... | ... | @@ -104,10 +191,13 @@ |
| 104 | 191 | uppercase alphabetic characters converted to lowercase. Casing is
|
| 105 | 192 | :simple or :full for simple or full case conversion, respectively."
|
| 106 | 193 | |
| 107 | - (declare (fixnum start))
|
|
| 108 | - (if (eq casing :simple)
|
|
| 109 | - (cl:string-downcase string :start start :end end)
|
|
| 110 | - (string-downcase-full string :start start :end end)))
|
|
| 194 | + (declare (fixnum start)
|
|
| 195 | + (type (member :simple :full) casing))
|
|
| 196 | + (ecase casing
|
|
| 197 | + (:simple
|
|
| 198 | + (string-downcase-simple string :start start :end end))
|
|
| 199 | + (:full
|
|
| 200 | + (string-downcase-full string :start start :end end))))
|
|
| 111 | 201 | |
| 112 | 202 | |
| 113 | 203 | ;;;
|
| ... | ... | @@ -553,12 +643,14 @@ |
| 553 | 643 | delimited by non-case-modifiable chars. "
|
| 554 | 644 | |
| 555 | 645 | (declare (fixnum start)
|
| 556 | - (type (member :simple :full :title) casing))
|
|
| 646 | + (type (member :simple-title :simple :full :title) casing))
|
|
| 557 | 647 | (if unicode-word-break
|
| 558 | 648 | (string-capitalize-unicode string :start start :end end :casing casing)
|
| 559 | - (if (eq casing :simple)
|
|
| 560 | - (string-capitalize-simple string :start start :end end)
|
|
| 561 | - (string-capitalize-full string :start start :end end :casing casing))))
|
|
| 649 | + (ecase casing
|
|
| 650 | + (:simple-title
|
|
| 651 | + (string-capitalize-simple string :start start :end end))
|
|
| 652 | + ((:simple :full :title)
|
|
| 653 | + (string-capitalize-full string :start start :end end :casing casing)))))
|
|
| 562 | 654 | |
| 563 | 655 | |
| 564 | 656 | (defun decompose-hangul-syllable (cp stream)
|
| ... | ... | @@ -3880,13 +3880,13 @@ msgid "" |
| 3880 | 3880 | " a new string Count long filled with the fill character."
|
| 3881 | 3881 | msgstr ""
|
| 3882 | 3882 | |
| 3883 | -#: src/code/string.lisp
|
|
| 3883 | +#: src/code/unicode.lisp src/code/string.lisp
|
|
| 3884 | 3884 | msgid ""
|
| 3885 | 3885 | "Given a string, returns a new string that is a copy of it with all\n"
|
| 3886 | 3886 | " lower case alphabetic characters converted to uppercase."
|
| 3887 | 3887 | msgstr ""
|
| 3888 | 3888 | |
| 3889 | -#: src/code/string.lisp
|
|
| 3889 | +#: src/code/unicode.lisp src/code/string.lisp
|
|
| 3890 | 3890 | msgid ""
|
| 3891 | 3891 | "Given a string, returns a new string that is a copy of it with all\n"
|
| 3892 | 3892 | " upper case alphabetic characters converted to lowercase."
|
| ... | ... | @@ -8,11 +8,10 @@ |
| 8 | 8 | (defun make-test-string ()
|
| 9 | 9 | ;; Create a string consisting of all the code units EXCEPT for the
|
| 10 | 10 | ;; surrogates because string casing handles that differently.
|
| 11 | - (coerce
|
|
| 12 | - (loop for code from 0 to #xffff
|
|
| 13 | - unless (lisp::surrogatep code)
|
|
| 14 | - collect (code-char code))
|
|
| 15 | - 'string))
|
|
| 11 | + (let ((s (make-string char-code-limit)))
|
|
| 12 | + (dotimes (k char-code-limit)
|
|
| 13 | + (setf (aref s k) (code-char k)))
|
|
| 14 | + s))
|
|
| 16 | 15 | |
| 17 | 16 | (define-test string-upcase
|
| 18 | 17 | (:tag :issues)
|
| ... | ... | @@ -62,32 +61,33 @@ |
| 62 | 61 | |
| 63 | 62 | (define-test string-capitalize
|
| 64 | 63 | (:tag :issues)
|
| 65 | - (let* ((s
|
|
| 66 | - ;; This string contains a couple of characters where
|
|
| 67 | - ;; Unicode has a titlecase version of the character. We
|
|
| 68 | - ;; want to make sure we use char-upcase to capitalize the
|
|
| 69 | - ;; string instead of lisp::char-titlecse
|
|
| 70 | - (coerce
|
|
| 71 | - '(#\Latin_Small_Letter_Dz
|
|
| 72 | - #\a #\b
|
|
| 73 | - #\space
|
|
| 74 | - #\Latin_Small_Letter_Lj
|
|
| 75 | - #\A #\B)
|
|
| 76 | - 'string))
|
|
| 64 | + (let* ((s (string-capitalize
|
|
| 65 | + ;; #\Latin_Small_Letter_Dz and #\Latin_Small_Letter_Lj
|
|
| 66 | + ;; #have Unicode titlecase characters that differ from
|
|
| 67 | + ;; CHAR-UPCASE. This tests that STRING-CAPITALIZE use
|
|
| 68 | + ;; CHAR-UPCASE to produce the capitalization.
|
|
| 69 | + (coerce
|
|
| 70 | + '(#\Latin_Small_Letter_Dz
|
|
| 71 | + #\a #\b
|
|
| 72 | + #\space
|
|
| 73 | + #\Latin_Small_Letter_Lj
|
|
| 74 | + #\A #\B)
|
|
| 75 | + 'string)))
|
|
| 77 | 76 | (expected
|
| 78 | - ;; Manually convert S to a capitalized string using
|
|
| 79 | - ;; char-upcase/downcase.
|
|
| 80 | - (map 'string
|
|
| 81 | - #'(lambda (ch f)
|
|
| 82 | - (funcall f ch))
|
|
| 77 | + ;; Manually convert the test string by calling CHAR-UPCASE
|
|
| 78 | + ;; or CHAR-DOWNCASE (or IDENTITY) to get the desired
|
|
| 79 | + ;; capitalized string.
|
|
| 80 | + (map 'list
|
|
| 81 | + #'(lambda (c f)
|
|
| 82 | + (funcall f c))
|
|
| 83 | 83 | s
|
| 84 | 84 | (list #'char-upcase
|
| 85 | 85 | #'char-downcase
|
| 86 | 86 | #'char-downcase
|
| 87 | - #'char-downcase
|
|
| 87 | + #'identity
|
|
| 88 | 88 | #'char-upcase
|
| 89 | 89 | #'char-downcase
|
| 90 | 90 | #'char-downcase))))
|
| 91 | 91 | (assert-equal
|
| 92 | 92 | (map 'list #'char-name expected)
|
| 93 | - (map 'list #'char-name (string-capitalize s))))) |
|
| 93 | + (map 'list #'char-name s)))) |
| ... | ... | @@ -192,3 +192,19 @@ |
| 192 | 192 | computed-breaks)))
|
| 193 | 193 | (assert-equalp b
|
| 194 | 194 | (do-test s)))))))))
|
| 195 | + |
|
| 196 | +(define-test unicode.case-extend
|
|
| 197 | + "Test that Unicode never produces an upper or lower case character
|
|
| 198 | + outside the BMP for a character in the BMP"
|
|
| 199 | + (:tag :unicode)
|
|
| 200 | + ;; For each character code (that isn't a surrogate), find the
|
|
| 201 | + ;; corresponding Unicode upper and lowe case character. Verify that
|
|
| 202 | + ;; this character is in the BMP.
|
|
| 203 | + (loop for code from 0 below char-code-limit
|
|
| 204 | + unless (lisp::surrogatep code)
|
|
| 205 | + do
|
|
| 206 | + (assert-true (< (lisp::unicode-upper code) char-code-limit)
|
|
| 207 | + code (lisp::unicode-upper code))
|
|
| 208 | + (assert-true (< (lisp::unicode-lower code) char-code-limit)
|
|
| 209 | + code (lisp::unicode-upper code))))
|
|
| 210 | + |