Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl

Commits:

4 changed files:

Changes:

  • src/code/char.lisp
    ... ... @@ -60,10 +60,12 @@
    60 60
     (deftype codepoint ()
    
    61 61
       `(integer 0 (,codepoint-limit)))
    
    62 62
     
    
    63
    -;; This MUST be greater than or equal to 127!
    
    63
    +;; This MUST be greater than or equal to 127!  Be very careful about
    
    64
    +;; changing this.  It affects all kinds of casing issues.
    
    64 65
     (defconstant +unicode-lower-limit+
    
    65 66
       191
    
    66
    -  "A character code strictly larger than this is handled using Unicode rules.")
    
    67
    +  "A character code strictly larger than this is handled using Unicode
    
    68
    +  rules.")
    
    67 69
     
    
    68 70
     
    
    69 71
     (macrolet ((frob (char-names-list)
    

  • src/code/string.lisp
    ... ... @@ -640,12 +640,8 @@
    640 640
     	  (declare (fixnum index new-index))
    
    641 641
     	  (multiple-value-bind (code wide) (codepoint string index)
    
    642 642
     	    (when wide (incf index))
    
    643
    -	    ;; Handle ASCII specially because this is called early in
    
    644
    -	    ;; initialization, before unidata is available.
    
    645
    -            #+nil
    
    646
    -	    (cond ((< 96 code 123) (decf code 32))
    
    647
    -		  #+unicode
    
    648
    -		  ((> code 127) (setq code (unicode-upper code))))
    
    643
    +            ;; Use char-upcase if it's not a surrogate pair so that
    
    644
    +            ;; we're always consist.
    
    649 645
                 (if wide
    
    650 646
                     (setq code (unicode-upper code))
    
    651 647
                     (setf code (char-code (char-upcase (code-char code)))))
    
    ... ... @@ -686,11 +682,8 @@
    686 682
     	  (declare (fixnum index new-index))
    
    687 683
     	  (multiple-value-bind (code wide) (codepoint string index)
    
    688 684
     	    (when wide (incf index))
    
    689
    -	    ;; Handle ASCII specially because this is called early in
    
    690
    -	    ;; initialization, before unidata is available.
    
    691
    -            #+nil
    
    692
    -	    (cond ((< 64 code 91) (incf code 32))
    
    693
    -		  ((> code 127) (setq code (unicode-lower code))))
    
    685
    +            ;; Use char-downcase if it's not a surrogate pair so that
    
    686
    +            ;; we're always consist.
    
    694 687
                 (if wide
    
    695 688
                     (setq code (unicode-lower code))
    
    696 689
                     (setq code (char-code (char-downcase (code-char code)))))
    
    ... ... @@ -761,12 +754,9 @@
    761 754
     	  ((= index (the fixnum end)))
    
    762 755
     	(declare (fixnum index))
    
    763 756
     	(multiple-value-bind (code wide) (codepoint string index)
    
    764
    -	  (declare (ignore wide))
    
    765
    -	  ;; Handle ASCII specially because this is called early in
    
    766
    -	  ;; initialization, before unidata is available.
    
    767
    -	  (cond ((< 96 code 123) (decf code 32))
    
    768
    -		#+unicode
    
    769
    -		((> code 127) (setq code (unicode-upper code))))
    
    757
    +          (if wide
    
    758
    +              (setq code (unicode-upper code))
    
    759
    +              (setf code (char-code (char-upcase (code-char code)))))
    
    770 760
     	  ;;@@ WARNING: this may, in theory, need to extend string
    
    771 761
     	  ;;      (which, obviously, we can't do here.  Unless
    
    772 762
     	  ;;       STRING is adjustable, maybe)
    
    ... ... @@ -788,10 +778,9 @@
    788 778
     	  ((= index (the fixnum end)))
    
    789 779
     	(declare (fixnum index))
    
    790 780
     	(multiple-value-bind (code wide) (codepoint string index)
    
    791
    -	  (declare (ignore wide))
    
    792
    -	  (cond ((< 64 code 91) (incf code 32))
    
    793
    -		#+unicode
    
    794
    -		((> code 127) (setq code (unicode-lower code))))
    
    781
    +          (if wide
    
    782
    +              (setq code (unicode-lower code))
    
    783
    +              (setf code (char-code (char-downcase (code-char code)))))
    
    795 784
     	  ;;@@ WARNING: this may, in theory, need to extend string
    
    796 785
     	  ;;      (which, obviously, we can't do here.  Unless
    
    797 786
     	  ;;       STRING is adjustable, maybe)
    

  • src/i18n/locale/cmucl.pot
    ... ... @@ -5445,7 +5445,8 @@ msgstr ""
    5445 5445
     
    
    5446 5446
     #: src/code/char.lisp
    
    5447 5447
     msgid ""
    
    5448
    -"A character code strictly larger than this is handled using Unicode rules."
    
    5448
    +"A character code strictly larger than this is handled using Unicode\n"
    
    5449
    +"  rules."
    
    5449 5450
     msgstr ""
    
    5450 5451
     
    
    5451 5452
     #: src/code/char.lisp
    

  • tests/string.lisp
    ... ... @@ -22,7 +22,7 @@
    22 22
          (loop for expected across s
    
    23 23
                and actual across s-upcase
    
    24 24
                when (char/= actual (char-upcase expected))
    
    25
    -             collect (list (char-upcase (char-code expected))
    
    25
    +             collect (list (char-upcase expected)
    
    26 26
                                (char-code actual))))))
    
    27 27
     
    
    28 28
     (define-test string-downcase
    
    ... ... @@ -33,5 +33,29 @@
    33 33
          (loop for expected across s
    
    34 34
                and actual across s-downcase
    
    35 35
                when (char/= actual (char-downcase expected))
    
    36
    -             collect (list (char-downcase (char-code expected))
    
    36
    +             collect (list (char-downcase expected)
    
    37
    +                           (char-code actual))))))
    
    38
    +
    
    39
    +(define-test nstring-upcase
    
    40
    +    (:tag :issues)
    
    41
    +  (let* ((s (make-test-string))
    
    42
    +         (ns (make-test-string))
    
    43
    +         (ns-upcase (nstring-upcase ns)))
    
    44
    +    (assert-false
    
    45
    +     (loop for expected across s
    
    46
    +           and actual across ns
    
    47
    +           when (char/= actual (char-upcase expected))
    
    48
    +             collect (list (char-upcase expected)
    
    49
    +                           (char-code actual))))))
    
    50
    +
    
    51
    +(define-test nstring-downcase
    
    52
    +    (:tag :issues)
    
    53
    +  (let* ((s (make-test-string))
    
    54
    +         (ns (make-test-string))
    
    55
    +         (ns-downcase (nstring-downcase ns)))
    
    56
    +    (assert-false
    
    57
    +     (loop for expected across s
    
    58
    +           and actual across ns
    
    59
    +           when (char/= actual (char-downcase expected))
    
    60
    +             collect (list (char-downcase expected)
    
    37 61
                                (char-code actual))))))