Raymond Toy pushed to branch issue-323-cl-string-casing at cmucl / cmucl
Commits: 3557fc31 by Raymond Toy at 2024-05-31T16:20:11-07:00 CL-compliant string-capitalize
Previously, when we capitalized a word, we used the titlecase. However it should be upper case. Make it so, but move the original implementation to unicode.lisp.
Add a test that we do use uppercase instead of titlecase.
- - - - - 163cafa4 by Raymond Toy at 2024-05-31T16:23:13-07:00 Rename string-capitalize to string-capitalize-simple.
Forgot to rename the old version so it's clear it's not CL string-capitalize. Also call it in unicode:string-capitalize when the casing is simple.
- - - - -
4 changed files:
- src/code/string.lisp - src/code/unicode.lisp - src/i18n/locale/cmucl.pot - tests/string.lisp
Changes:
===================================== src/code/string.lisp ===================================== @@ -706,7 +706,7 @@ (setq newword t)) (newword ;;char is first case-modifiable after non-case-modifiable - (setq char (char-titlecase char)) + (setq char (char-upcase char)) (setq newword ())) ;;char is case-modifiable, but not first (t (setq char (char-downcase char))))
===================================== src/code/unicode.lisp ===================================== @@ -525,6 +525,48 @@ :end next))) (write-string string result :start end :end offset-slen))))))
+(defun string-capitalize-simple (string &key (start 0) end) + _N"Given a string, returns a copy of the string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let* ((string (if (stringp string) string (string string))) + (slen (length string))) + (declare (fixnum slen)) + (with-one-string string start end offset + (let ((offset-slen (+ slen offset)) + (newstring (make-string slen))) + (declare (fixnum offset-slen)) + (do ((index offset (1+ index)) + (new-index 0 (1+ new-index))) + ((= index start)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + (do ((index start (1+ index)) + (new-index (- start offset) (1+ new-index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index new-index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setq char (char-titlecase char)) + (setq newword ())) + ;;char is case-modifiable, but not first + (t (setq char (char-downcase char)))) + (setf (schar newstring new-index) char)) + (do ((index end (1+ index)) + (new-index (- (the fixnum end) offset) (1+ new-index))) + ((= index offset-slen)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + newstring)))) + (defun string-capitalize-full (string &key (start 0) end (casing :full)) "Capitalize String using the Common Lisp word-break algorithm to find the words in String. The beginning is capitalized depending on the @@ -599,7 +641,7 @@ (if unicode-word-break (string-capitalize-unicode string :start start :end end :casing casing) (if (eq casing :simple) - (cl:string-capitalize string :start start :end end) + (cl:string-capitalize-simple string :start start :end end) (string-capitalize-full string :start start :end end :casing casing))))
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -3892,7 +3892,7 @@ msgid "" " upper case alphabetic characters converted to lowercase." msgstr ""
-#: src/code/string.lisp +#: src/code/unicode.lisp src/code/string.lisp msgid "" "Given a string, returns a copy of the string with the first\n" " character of each ``word'' converted to upper-case, and remaining\n"
===================================== tests/string.lisp ===================================== @@ -58,3 +58,36 @@ when (char/= actual (char-downcase expected)) collect (list (char-downcase expected) (char-code actual)))))) + +(define-test string-capitalize + (:tag :issues) + (let* ((s (string-capitalize + ;; #\Latin_Small_Letter_Dz and #\Latin_Small_Letter_Lj + ;; #have Unicode titlecase characters that differ from + ;; CHAR-UPCASE. This tests that STRING-CAPITALIZE use + ;; CHAR-UPCASE to produce the capitalization. + (coerce + '(#\Latin_Small_Letter_Dz + #\a #\b + #\space + #\Latin_Small_Letter_Lj + #\A #\B) + 'string))) + (expected + ;; Manually convert the test string by calling CHAR-UPCASE + ;; or CHAR-DOWNCASE (or IDENTITY) to get the desired + ;; capitalized string. + (map 'list + #'(lambda (c f) + (funcall f c)) + s + (list #'char-upcase + #'char-downcase + #'char-downcase + #'identity + #'char-upcase + #'char-downcase + #'char-downcase)))) + (assert-equal + (map 'list #'char-name expected) + (map 'list #'char-name s))))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/483dfe3999bd7db0cee36ef...