Raymond Toy pushed to branch issue-323-cl-string-casing at cmucl / cmucl
Commits:
- 
3557fc31
by Raymond Toy at 2024-05-31T16:20:11-07:00
 - 
163cafa4
by Raymond Toy at 2024-05-31T16:23:13-07:00
 
4 changed files:
Changes:
| ... | ... | @@ -706,7 +706,7 @@ | 
| 706 | 706 |  		 (setq newword t))
 | 
| 707 | 707 |  		(newword
 | 
| 708 | 708 |  		 ;;char is first case-modifiable after non-case-modifiable
 | 
| 709 | -		 (setq char (char-titlecase char))
 | 
|
| 709 | +		 (setq char (char-upcase char))
 | 
|
| 710 | 710 |  		 (setq newword ()))
 | 
| 711 | 711 |  		;;char is case-modifiable, but not first
 | 
| 712 | 712 |  		(t (setq char (char-downcase char))))
 | 
| ... | ... | @@ -525,6 +525,48 @@ | 
| 525 | 525 |  			    :end next)))
 | 
| 526 | 526 |  	  (write-string string result :start end :end offset-slen))))))
 | 
| 527 | 527 | |
| 528 | +(defun string-capitalize-simple (string &key (start 0) end)
 | 
|
| 529 | +  _N"Given a string, returns a copy of the string with the first
 | 
|
| 530 | +  character of each ``word'' converted to upper-case, and remaining
 | 
|
| 531 | +  chars in the word converted to lower case. A ``word'' is defined
 | 
|
| 532 | +  to be a string of case-modifiable characters delimited by
 | 
|
| 533 | +  non-case-modifiable chars."
 | 
|
| 534 | +  (declare (fixnum start))
 | 
|
| 535 | +  (let* ((string (if (stringp string) string (string string)))
 | 
|
| 536 | +	 (slen (length string)))
 | 
|
| 537 | +    (declare (fixnum slen))
 | 
|
| 538 | +    (with-one-string string start end offset
 | 
|
| 539 | +      (let ((offset-slen (+ slen offset))
 | 
|
| 540 | +	    (newstring (make-string slen)))
 | 
|
| 541 | +	(declare (fixnum offset-slen))
 | 
|
| 542 | +	(do ((index offset (1+ index))
 | 
|
| 543 | +	     (new-index 0 (1+ new-index)))
 | 
|
| 544 | +	    ((= index start))
 | 
|
| 545 | +	  (declare (fixnum index new-index))
 | 
|
| 546 | +	  (setf (schar newstring new-index) (schar string index)))
 | 
|
| 547 | +	(do ((index start (1+ index))
 | 
|
| 548 | +	     (new-index (- start offset) (1+ new-index))
 | 
|
| 549 | +	     (newword t)
 | 
|
| 550 | +	     (char ()))
 | 
|
| 551 | +	    ((= index (the fixnum end)))
 | 
|
| 552 | +	  (declare (fixnum index new-index))
 | 
|
| 553 | +	  (setq char (schar string index))
 | 
|
| 554 | +	  (cond ((not (alphanumericp char))
 | 
|
| 555 | +		 (setq newword t))
 | 
|
| 556 | +		(newword
 | 
|
| 557 | +		 ;;char is first case-modifiable after non-case-modifiable
 | 
|
| 558 | +		 (setq char (char-titlecase char))
 | 
|
| 559 | +		 (setq newword ()))
 | 
|
| 560 | +		;;char is case-modifiable, but not first
 | 
|
| 561 | +		(t (setq char (char-downcase char))))
 | 
|
| 562 | +	  (setf (schar newstring new-index) char))
 | 
|
| 563 | +	(do ((index end (1+ index))
 | 
|
| 564 | +	     (new-index (- (the fixnum end) offset) (1+ new-index)))
 | 
|
| 565 | +	    ((= index offset-slen))
 | 
|
| 566 | +	  (declare (fixnum index new-index))
 | 
|
| 567 | +	  (setf (schar newstring new-index) (schar string index)))
 | 
|
| 568 | +	newstring))))
 | 
|
| 569 | +  | 
|
| 528 | 570 |  (defun string-capitalize-full (string &key (start 0) end (casing :full))
 | 
| 529 | 571 |    "Capitalize String using the Common Lisp word-break algorithm to find
 | 
| 530 | 572 |    the words in String.  The beginning is capitalized depending on the
 | 
| ... | ... | @@ -599,7 +641,7 @@ | 
| 599 | 641 |    (if unicode-word-break
 | 
| 600 | 642 |        (string-capitalize-unicode string :start start :end end :casing casing)
 | 
| 601 | 643 |        (if (eq casing :simple)
 | 
| 602 | -	  (cl:string-capitalize string :start start :end end)
 | 
|
| 644 | +	  (cl:string-capitalize-simple string :start start :end end)
 | 
|
| 603 | 645 |  	  (string-capitalize-full string :start start :end end :casing casing))))
 | 
| 604 | 646 | |
| 605 | 647 | 
| ... | ... | @@ -3892,7 +3892,7 @@ msgid "" | 
| 3892 | 3892 |  "  upper case alphabetic characters converted to lowercase."
 | 
| 3893 | 3893 |  msgstr ""
 | 
| 3894 | 3894 | |
| 3895 | -#: src/code/string.lisp
 | 
|
| 3895 | +#: src/code/unicode.lisp src/code/string.lisp
 | 
|
| 3896 | 3896 |  msgid ""
 | 
| 3897 | 3897 |  "Given a string, returns a copy of the string with the first\n"
 | 
| 3898 | 3898 |  "  character of each ``word'' converted to upper-case, and remaining\n"
 | 
| ... | ... | @@ -58,3 +58,36 @@ | 
| 58 | 58 |             when (char/= actual (char-downcase expected))
 | 
| 59 | 59 |               collect (list (char-downcase expected)
 | 
| 60 | 60 |                             (char-code actual))))))
 | 
| 61 | +  | 
|
| 62 | +(define-test string-capitalize
 | 
|
| 63 | +    (:tag :issues)
 | 
|
| 64 | +  (let* ((s (string-capitalize
 | 
|
| 65 | +             ;; #\Latin_Small_Letter_Dz and #\Latin_Small_Letter_Lj
 | 
|
| 66 | +             ;; #have Unicode titlecase characters that differ from
 | 
|
| 67 | +             ;; CHAR-UPCASE.  This tests that STRING-CAPITALIZE use
 | 
|
| 68 | +             ;; CHAR-UPCASE to produce the capitalization.
 | 
|
| 69 | +             (coerce
 | 
|
| 70 | +              '(#\Latin_Small_Letter_Dz
 | 
|
| 71 | +                #\a #\b
 | 
|
| 72 | +                #\space
 | 
|
| 73 | +                #\Latin_Small_Letter_Lj
 | 
|
| 74 | +                #\A #\B)
 | 
|
| 75 | +              'string)))
 | 
|
| 76 | +         (expected
 | 
|
| 77 | +           ;; Manually convert the test string by calling CHAR-UPCASE
 | 
|
| 78 | +           ;; or CHAR-DOWNCASE (or IDENTITY) to get the desired
 | 
|
| 79 | +           ;; capitalized string.
 | 
|
| 80 | +           (map 'list
 | 
|
| 81 | +                #'(lambda (c f)
 | 
|
| 82 | +                    (funcall f c))
 | 
|
| 83 | +                s
 | 
|
| 84 | +                (list #'char-upcase
 | 
|
| 85 | +                      #'char-downcase
 | 
|
| 86 | +                      #'char-downcase
 | 
|
| 87 | +                      #'identity
 | 
|
| 88 | +                      #'char-upcase
 | 
|
| 89 | +                      #'char-downcase
 | 
|
| 90 | +                      #'char-downcase))))
 | 
|
| 91 | +    (assert-equal
 | 
|
| 92 | +     (map 'list #'char-name expected)
 | 
|
| 93 | +     (map 'list #'char-name s)))) |