Raymond Toy pushed to branch issue-323-cl-string-casing at cmucl / cmucl

Commits:

4 changed files:

Changes:

  • src/code/string.lisp
    ... ... @@ -706,7 +706,7 @@
    706 706
     		 (setq newword t))
    
    707 707
     		(newword
    
    708 708
     		 ;;char is first case-modifiable after non-case-modifiable
    
    709
    -		 (setq char (char-titlecase char))
    
    709
    +		 (setq char (char-upcase char))
    
    710 710
     		 (setq newword ()))
    
    711 711
     		;;char is case-modifiable, but not first
    
    712 712
     		(t (setq char (char-downcase char))))
    

  • src/code/unicode.lisp
    ... ... @@ -525,6 +525,48 @@
    525 525
     			    :end next)))
    
    526 526
     	  (write-string string result :start end :end offset-slen))))))
    
    527 527
     
    
    528
    +(defun string-capitalize-simple (string &key (start 0) end)
    
    529
    +  _N"Given a string, returns a copy of the string with the first
    
    530
    +  character of each ``word'' converted to upper-case, and remaining
    
    531
    +  chars in the word converted to lower case. A ``word'' is defined
    
    532
    +  to be a string of case-modifiable characters delimited by
    
    533
    +  non-case-modifiable chars."
    
    534
    +  (declare (fixnum start))
    
    535
    +  (let* ((string (if (stringp string) string (string string)))
    
    536
    +	 (slen (length string)))
    
    537
    +    (declare (fixnum slen))
    
    538
    +    (with-one-string string start end offset
    
    539
    +      (let ((offset-slen (+ slen offset))
    
    540
    +	    (newstring (make-string slen)))
    
    541
    +	(declare (fixnum offset-slen))
    
    542
    +	(do ((index offset (1+ index))
    
    543
    +	     (new-index 0 (1+ new-index)))
    
    544
    +	    ((= index start))
    
    545
    +	  (declare (fixnum index new-index))
    
    546
    +	  (setf (schar newstring new-index) (schar string index)))
    
    547
    +	(do ((index start (1+ index))
    
    548
    +	     (new-index (- start offset) (1+ new-index))
    
    549
    +	     (newword t)
    
    550
    +	     (char ()))
    
    551
    +	    ((= index (the fixnum end)))
    
    552
    +	  (declare (fixnum index new-index))
    
    553
    +	  (setq char (schar string index))
    
    554
    +	  (cond ((not (alphanumericp char))
    
    555
    +		 (setq newword t))
    
    556
    +		(newword
    
    557
    +		 ;;char is first case-modifiable after non-case-modifiable
    
    558
    +		 (setq char (char-titlecase char))
    
    559
    +		 (setq newword ()))
    
    560
    +		;;char is case-modifiable, but not first
    
    561
    +		(t (setq char (char-downcase char))))
    
    562
    +	  (setf (schar newstring new-index) char))
    
    563
    +	(do ((index end (1+ index))
    
    564
    +	     (new-index (- (the fixnum end) offset) (1+ new-index)))
    
    565
    +	    ((= index offset-slen))
    
    566
    +	  (declare (fixnum index new-index))
    
    567
    +	  (setf (schar newstring new-index) (schar string index)))
    
    568
    +	newstring))))
    
    569
    +
    
    528 570
     (defun string-capitalize-full (string &key (start 0) end (casing :full))
    
    529 571
       "Capitalize String using the Common Lisp word-break algorithm to find
    
    530 572
       the words in String.  The beginning is capitalized depending on the
    
    ... ... @@ -599,7 +641,7 @@
    599 641
       (if unicode-word-break
    
    600 642
           (string-capitalize-unicode string :start start :end end :casing casing)
    
    601 643
           (if (eq casing :simple)
    
    602
    -	  (cl:string-capitalize string :start start :end end)
    
    644
    +	  (cl:string-capitalize-simple string :start start :end end)
    
    603 645
     	  (string-capitalize-full string :start start :end end :casing casing))))
    
    604 646
     
    
    605 647
     
    

  • src/i18n/locale/cmucl.pot
    ... ... @@ -3892,7 +3892,7 @@ msgid ""
    3892 3892
     "  upper case alphabetic characters converted to lowercase."
    
    3893 3893
     msgstr ""
    
    3894 3894
     
    
    3895
    -#: src/code/string.lisp
    
    3895
    +#: src/code/unicode.lisp src/code/string.lisp
    
    3896 3896
     msgid ""
    
    3897 3897
     "Given a string, returns a copy of the string with the first\n"
    
    3898 3898
     "  character of each ``word'' converted to upper-case, and remaining\n"
    

  • tests/string.lisp
    ... ... @@ -58,3 +58,36 @@
    58 58
                when (char/= actual (char-downcase expected))
    
    59 59
                  collect (list (char-downcase expected)
    
    60 60
                                (char-code actual))))))
    
    61
    +
    
    62
    +(define-test string-capitalize
    
    63
    +    (:tag :issues)
    
    64
    +  (let* ((s (string-capitalize
    
    65
    +             ;; #\Latin_Small_Letter_Dz and #\Latin_Small_Letter_Lj
    
    66
    +             ;; #have Unicode titlecase characters that differ from
    
    67
    +             ;; CHAR-UPCASE.  This tests that STRING-CAPITALIZE use
    
    68
    +             ;; CHAR-UPCASE to produce the capitalization.
    
    69
    +             (coerce
    
    70
    +              '(#\Latin_Small_Letter_Dz
    
    71
    +                #\a #\b
    
    72
    +                #\space
    
    73
    +                #\Latin_Small_Letter_Lj
    
    74
    +                #\A #\B)
    
    75
    +              'string)))
    
    76
    +         (expected
    
    77
    +           ;; Manually convert the test string by calling CHAR-UPCASE
    
    78
    +           ;; or CHAR-DOWNCASE (or IDENTITY) to get the desired
    
    79
    +           ;; capitalized string.
    
    80
    +           (map 'list
    
    81
    +                #'(lambda (c f)
    
    82
    +                    (funcall f c))
    
    83
    +                s
    
    84
    +                (list #'char-upcase
    
    85
    +                      #'char-downcase
    
    86
    +                      #'char-downcase
    
    87
    +                      #'identity
    
    88
    +                      #'char-upcase
    
    89
    +                      #'char-downcase
    
    90
    +                      #'char-downcase))))
    
    91
    +    (assert-equal
    
    92
    +     (map 'list #'char-name expected)
    
    93
    +     (map 'list #'char-name s))))