Raymond Toy pushed to branch issue-323-cl-string-casing at cmucl / cmucl
Commits:
-
3557fc31
by Raymond Toy at 2024-05-31T16:20:11-07:00
-
163cafa4
by Raymond Toy at 2024-05-31T16:23:13-07:00
4 changed files:
Changes:
... | ... | @@ -706,7 +706,7 @@ |
706 | 706 | (setq newword t))
|
707 | 707 | (newword
|
708 | 708 | ;;char is first case-modifiable after non-case-modifiable
|
709 | - (setq char (char-titlecase char))
|
|
709 | + (setq char (char-upcase char))
|
|
710 | 710 | (setq newword ()))
|
711 | 711 | ;;char is case-modifiable, but not first
|
712 | 712 | (t (setq char (char-downcase char))))
|
... | ... | @@ -525,6 +525,48 @@ |
525 | 525 | :end next)))
|
526 | 526 | (write-string string result :start end :end offset-slen))))))
|
527 | 527 | |
528 | +(defun string-capitalize-simple (string &key (start 0) end)
|
|
529 | + _N"Given a string, returns a copy of the string with the first
|
|
530 | + character of each ``word'' converted to upper-case, and remaining
|
|
531 | + chars in the word converted to lower case. A ``word'' is defined
|
|
532 | + to be a string of case-modifiable characters delimited by
|
|
533 | + non-case-modifiable chars."
|
|
534 | + (declare (fixnum start))
|
|
535 | + (let* ((string (if (stringp string) string (string string)))
|
|
536 | + (slen (length string)))
|
|
537 | + (declare (fixnum slen))
|
|
538 | + (with-one-string string start end offset
|
|
539 | + (let ((offset-slen (+ slen offset))
|
|
540 | + (newstring (make-string slen)))
|
|
541 | + (declare (fixnum offset-slen))
|
|
542 | + (do ((index offset (1+ index))
|
|
543 | + (new-index 0 (1+ new-index)))
|
|
544 | + ((= index start))
|
|
545 | + (declare (fixnum index new-index))
|
|
546 | + (setf (schar newstring new-index) (schar string index)))
|
|
547 | + (do ((index start (1+ index))
|
|
548 | + (new-index (- start offset) (1+ new-index))
|
|
549 | + (newword t)
|
|
550 | + (char ()))
|
|
551 | + ((= index (the fixnum end)))
|
|
552 | + (declare (fixnum index new-index))
|
|
553 | + (setq char (schar string index))
|
|
554 | + (cond ((not (alphanumericp char))
|
|
555 | + (setq newword t))
|
|
556 | + (newword
|
|
557 | + ;;char is first case-modifiable after non-case-modifiable
|
|
558 | + (setq char (char-titlecase char))
|
|
559 | + (setq newword ()))
|
|
560 | + ;;char is case-modifiable, but not first
|
|
561 | + (t (setq char (char-downcase char))))
|
|
562 | + (setf (schar newstring new-index) char))
|
|
563 | + (do ((index end (1+ index))
|
|
564 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
565 | + ((= index offset-slen))
|
|
566 | + (declare (fixnum index new-index))
|
|
567 | + (setf (schar newstring new-index) (schar string index)))
|
|
568 | + newstring))))
|
|
569 | + |
|
528 | 570 | (defun string-capitalize-full (string &key (start 0) end (casing :full))
|
529 | 571 | "Capitalize String using the Common Lisp word-break algorithm to find
|
530 | 572 | the words in String. The beginning is capitalized depending on the
|
... | ... | @@ -599,7 +641,7 @@ |
599 | 641 | (if unicode-word-break
|
600 | 642 | (string-capitalize-unicode string :start start :end end :casing casing)
|
601 | 643 | (if (eq casing :simple)
|
602 | - (cl:string-capitalize string :start start :end end)
|
|
644 | + (cl:string-capitalize-simple string :start start :end end)
|
|
603 | 645 | (string-capitalize-full string :start start :end end :casing casing))))
|
604 | 646 | |
605 | 647 |
... | ... | @@ -3892,7 +3892,7 @@ msgid "" |
3892 | 3892 | " upper case alphabetic characters converted to lowercase."
|
3893 | 3893 | msgstr ""
|
3894 | 3894 | |
3895 | -#: src/code/string.lisp
|
|
3895 | +#: src/code/unicode.lisp src/code/string.lisp
|
|
3896 | 3896 | msgid ""
|
3897 | 3897 | "Given a string, returns a copy of the string with the first\n"
|
3898 | 3898 | " character of each ``word'' converted to upper-case, and remaining\n"
|
... | ... | @@ -58,3 +58,36 @@ |
58 | 58 | when (char/= actual (char-downcase expected))
|
59 | 59 | collect (list (char-downcase expected)
|
60 | 60 | (char-code actual))))))
|
61 | + |
|
62 | +(define-test string-capitalize
|
|
63 | + (:tag :issues)
|
|
64 | + (let* ((s (string-capitalize
|
|
65 | + ;; #\Latin_Small_Letter_Dz and #\Latin_Small_Letter_Lj
|
|
66 | + ;; #have Unicode titlecase characters that differ from
|
|
67 | + ;; CHAR-UPCASE. This tests that STRING-CAPITALIZE use
|
|
68 | + ;; CHAR-UPCASE to produce the capitalization.
|
|
69 | + (coerce
|
|
70 | + '(#\Latin_Small_Letter_Dz
|
|
71 | + #\a #\b
|
|
72 | + #\space
|
|
73 | + #\Latin_Small_Letter_Lj
|
|
74 | + #\A #\B)
|
|
75 | + 'string)))
|
|
76 | + (expected
|
|
77 | + ;; Manually convert the test string by calling CHAR-UPCASE
|
|
78 | + ;; or CHAR-DOWNCASE (or IDENTITY) to get the desired
|
|
79 | + ;; capitalized string.
|
|
80 | + (map 'list
|
|
81 | + #'(lambda (c f)
|
|
82 | + (funcall f c))
|
|
83 | + s
|
|
84 | + (list #'char-upcase
|
|
85 | + #'char-downcase
|
|
86 | + #'char-downcase
|
|
87 | + #'identity
|
|
88 | + #'char-upcase
|
|
89 | + #'char-downcase
|
|
90 | + #'char-downcase))))
|
|
91 | + (assert-equal
|
|
92 | + (map 'list #'char-name expected)
|
|
93 | + (map 'list #'char-name s)))) |