Raymond Toy pushed to branch issue-323-cl-string-casing at cmucl / cmucl
Commits:
-
3557fc31
by Raymond Toy at 2024-05-31T16:20:11-07:00
-
163cafa4
by Raymond Toy at 2024-05-31T16:23:13-07:00
4 changed files:
Changes:
| ... | ... | @@ -706,7 +706,7 @@ |
| 706 | 706 | (setq newword t))
|
| 707 | 707 | (newword
|
| 708 | 708 | ;;char is first case-modifiable after non-case-modifiable
|
| 709 | - (setq char (char-titlecase char))
|
|
| 709 | + (setq char (char-upcase char))
|
|
| 710 | 710 | (setq newword ()))
|
| 711 | 711 | ;;char is case-modifiable, but not first
|
| 712 | 712 | (t (setq char (char-downcase char))))
|
| ... | ... | @@ -525,6 +525,48 @@ |
| 525 | 525 | :end next)))
|
| 526 | 526 | (write-string string result :start end :end offset-slen))))))
|
| 527 | 527 | |
| 528 | +(defun string-capitalize-simple (string &key (start 0) end)
|
|
| 529 | + _N"Given a string, returns a copy of the string with the first
|
|
| 530 | + character of each ``word'' converted to upper-case, and remaining
|
|
| 531 | + chars in the word converted to lower case. A ``word'' is defined
|
|
| 532 | + to be a string of case-modifiable characters delimited by
|
|
| 533 | + non-case-modifiable chars."
|
|
| 534 | + (declare (fixnum start))
|
|
| 535 | + (let* ((string (if (stringp string) string (string string)))
|
|
| 536 | + (slen (length string)))
|
|
| 537 | + (declare (fixnum slen))
|
|
| 538 | + (with-one-string string start end offset
|
|
| 539 | + (let ((offset-slen (+ slen offset))
|
|
| 540 | + (newstring (make-string slen)))
|
|
| 541 | + (declare (fixnum offset-slen))
|
|
| 542 | + (do ((index offset (1+ index))
|
|
| 543 | + (new-index 0 (1+ new-index)))
|
|
| 544 | + ((= index start))
|
|
| 545 | + (declare (fixnum index new-index))
|
|
| 546 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 547 | + (do ((index start (1+ index))
|
|
| 548 | + (new-index (- start offset) (1+ new-index))
|
|
| 549 | + (newword t)
|
|
| 550 | + (char ()))
|
|
| 551 | + ((= index (the fixnum end)))
|
|
| 552 | + (declare (fixnum index new-index))
|
|
| 553 | + (setq char (schar string index))
|
|
| 554 | + (cond ((not (alphanumericp char))
|
|
| 555 | + (setq newword t))
|
|
| 556 | + (newword
|
|
| 557 | + ;;char is first case-modifiable after non-case-modifiable
|
|
| 558 | + (setq char (char-titlecase char))
|
|
| 559 | + (setq newword ()))
|
|
| 560 | + ;;char is case-modifiable, but not first
|
|
| 561 | + (t (setq char (char-downcase char))))
|
|
| 562 | + (setf (schar newstring new-index) char))
|
|
| 563 | + (do ((index end (1+ index))
|
|
| 564 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
| 565 | + ((= index offset-slen))
|
|
| 566 | + (declare (fixnum index new-index))
|
|
| 567 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 568 | + newstring))))
|
|
| 569 | + |
|
| 528 | 570 | (defun string-capitalize-full (string &key (start 0) end (casing :full))
|
| 529 | 571 | "Capitalize String using the Common Lisp word-break algorithm to find
|
| 530 | 572 | the words in String. The beginning is capitalized depending on the
|
| ... | ... | @@ -599,7 +641,7 @@ |
| 599 | 641 | (if unicode-word-break
|
| 600 | 642 | (string-capitalize-unicode string :start start :end end :casing casing)
|
| 601 | 643 | (if (eq casing :simple)
|
| 602 | - (cl:string-capitalize string :start start :end end)
|
|
| 644 | + (cl:string-capitalize-simple string :start start :end end)
|
|
| 603 | 645 | (string-capitalize-full string :start start :end end :casing casing))))
|
| 604 | 646 | |
| 605 | 647 |
| ... | ... | @@ -3892,7 +3892,7 @@ msgid "" |
| 3892 | 3892 | " upper case alphabetic characters converted to lowercase."
|
| 3893 | 3893 | msgstr ""
|
| 3894 | 3894 | |
| 3895 | -#: src/code/string.lisp
|
|
| 3895 | +#: src/code/unicode.lisp src/code/string.lisp
|
|
| 3896 | 3896 | msgid ""
|
| 3897 | 3897 | "Given a string, returns a copy of the string with the first\n"
|
| 3898 | 3898 | " character of each ``word'' converted to upper-case, and remaining\n"
|
| ... | ... | @@ -58,3 +58,36 @@ |
| 58 | 58 | when (char/= actual (char-downcase expected))
|
| 59 | 59 | collect (list (char-downcase expected)
|
| 60 | 60 | (char-code actual))))))
|
| 61 | + |
|
| 62 | +(define-test string-capitalize
|
|
| 63 | + (:tag :issues)
|
|
| 64 | + (let* ((s (string-capitalize
|
|
| 65 | + ;; #\Latin_Small_Letter_Dz and #\Latin_Small_Letter_Lj
|
|
| 66 | + ;; #have Unicode titlecase characters that differ from
|
|
| 67 | + ;; CHAR-UPCASE. This tests that STRING-CAPITALIZE use
|
|
| 68 | + ;; CHAR-UPCASE to produce the capitalization.
|
|
| 69 | + (coerce
|
|
| 70 | + '(#\Latin_Small_Letter_Dz
|
|
| 71 | + #\a #\b
|
|
| 72 | + #\space
|
|
| 73 | + #\Latin_Small_Letter_Lj
|
|
| 74 | + #\A #\B)
|
|
| 75 | + 'string)))
|
|
| 76 | + (expected
|
|
| 77 | + ;; Manually convert the test string by calling CHAR-UPCASE
|
|
| 78 | + ;; or CHAR-DOWNCASE (or IDENTITY) to get the desired
|
|
| 79 | + ;; capitalized string.
|
|
| 80 | + (map 'list
|
|
| 81 | + #'(lambda (c f)
|
|
| 82 | + (funcall f c))
|
|
| 83 | + s
|
|
| 84 | + (list #'char-upcase
|
|
| 85 | + #'char-downcase
|
|
| 86 | + #'char-downcase
|
|
| 87 | + #'identity
|
|
| 88 | + #'char-upcase
|
|
| 89 | + #'char-downcase
|
|
| 90 | + #'char-downcase))))
|
|
| 91 | + (assert-equal
|
|
| 92 | + (map 'list #'char-name expected)
|
|
| 93 | + (map 'list #'char-name s)))) |