Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 5a037b19 by Raymond Toy at 2024-05-31T08:56:12-07:00 String-capitalize should use char-upcase, not lisp:char-titlecase
We were using lisp:;char-titlecase to capitalize the string, but we really should use char-upcase. However, to preserve this functionality, we moved the functions to the `unicode` package.
Added a test for this.
- - - - -
4 changed files:
- src/code/string.lisp - src/code/unicode.lisp - src/i18n/locale/cmucl.pot - tests/string.lisp
Changes:
===================================== src/code/string.lisp ===================================== @@ -702,6 +702,7 @@ (setf (schar newstring new-index) (schar string index))) newstring))))
+#+nil (defun string-capitalize (string &key (start 0) end) _N"Given a string, returns a copy of the string with the first character of each ``word'' converted to upper-case, and remaining @@ -744,8 +745,50 @@ (setf (schar newstring new-index) (schar string index))) newstring))))
+(defun string-capitalize (string &key (start 0) end) + _N"Given a string, returns a copy of the string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let* ((string (if (stringp string) string (string string))) + (slen (length string))) + (declare (fixnum slen)) + (with-one-string string start end offset + (let ((offset-slen (+ slen offset)) + (newstring (make-string slen))) + (declare (fixnum offset-slen)) + (do ((index offset (1+ index)) + (new-index 0 (1+ new-index))) + ((= index start)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + (do ((index start (1+ index)) + (new-index (- start offset) (1+ new-index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index new-index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setq char (char-upcase char)) + (setq newword ())) + ;;char is case-modifiable, but not first + (t (setq char (char-downcase char)))) + (setf (schar newstring new-index) char)) + (do ((index end (1+ index)) + (new-index (- (the fixnum end) offset) (1+ new-index))) + ((= index offset-slen)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + newstring)))) + (defun nstring-upcase (string &key (start 0) end) - "Given a string, returns that string with all lower case alphabetic + _N"Given a string, returns that string with all lower case alphabetic characters converted to uppercase." (declare (fixnum start)) (let ((save-header string)) @@ -769,7 +812,7 @@ save-header))
(defun nstring-downcase (string &key (start 0) end) - "Given a string, returns that string with all upper case alphabetic + _N"Given a string, returns that string with all upper case alphabetic characters converted to lowercase." (declare (fixnum start)) (let ((save-header string)) @@ -792,6 +835,7 @@ (setf (schar string (incf index)) lo)))))) save-header))
+#+nil (defun nstring-capitalize (string &key (start 0) end) "Given a string, returns that string with the first character of each ``word'' converted to upper-case, and remaining @@ -817,6 +861,31 @@ (setf (schar string index) (char-downcase char)))))) save-header))
+(defun nstring-capitalize (string &key (start 0) end) + _N"Given a string, returns that string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let ((save-header string)) + (with-one-string string start end offset + (do ((index start (1+ index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setf (schar string index) (char-upcase char)) + (setq newword ())) + (t + (setf (schar string index) (char-downcase char)))))) + save-header)) +
#+unicode (progn
===================================== src/code/unicode.lisp ===================================== @@ -441,6 +441,48 @@ :end next))) (write-string string result :start end :end offset-slen))))))
+(defun string-capitalize-simple (string &key (start 0) end) + _N"Given a string, returns a copy of the string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let* ((string (if (stringp string) string (string string))) + (slen (length string))) + (declare (fixnum slen)) + (with-one-string string start end offset + (let ((offset-slen (+ slen offset)) + (newstring (make-string slen))) + (declare (fixnum offset-slen)) + (do ((index offset (1+ index)) + (new-index 0 (1+ new-index))) + ((= index start)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + (do ((index start (1+ index)) + (new-index (- start offset) (1+ new-index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index new-index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setq char (char-titlecase char)) + (setq newword ())) + ;;char is case-modifiable, but not first + (t (setq char (char-downcase char)))) + (setf (schar newstring new-index) char)) + (do ((index end (1+ index)) + (new-index (- (the fixnum end) offset) (1+ new-index))) + ((= index offset-slen)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + newstring)))) + (defun string-capitalize-full (string &key (start 0) end (casing :full)) "Capitalize String using the Common Lisp word-break algorithm to find the words in String. The beginning is capitalized depending on the @@ -515,7 +557,7 @@ (if unicode-word-break (string-capitalize-unicode string :start start :end end :casing casing) (if (eq casing :simple) - (cl:string-capitalize string :start start :end end) + (string-capitalize-simple string :start start :end end) (string-capitalize-full string :start start :end end :casing casing))))
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -3892,7 +3892,7 @@ msgid "" " upper case alphabetic characters converted to lowercase." msgstr ""
-#: src/code/string.lisp +#: src/code/unicode.lisp src/code/string.lisp msgid "" "Given a string, returns a copy of the string with the first\n" " character of each ``word'' converted to upper-case, and remaining\n"
===================================== tests/string.lisp ===================================== @@ -59,3 +59,35 @@ when (char/= actual (char-downcase expected)) collect (list (char-downcase expected) (char-code actual)))))) + +(define-test string-capitalize + (:tag :issues) + (let* ((s + ;; This string contains a couple of characters where + ;; Unicode has a titlecase version of the character. We + ;; want to make sure we use char-upcase to capitalize the + ;; string instead of lisp::char-titlecse + (coerce + '(#\Latin_Small_Letter_Dz + #\a #\b + #\space + #\Latin_Small_Letter_Lj + #\A #\B) + 'string)) + (expected + ;; Manually convert S to a capitalized string using + ;; char-upcase/downcase. + (map 'string + #'(lambda (ch f) + (funcall f ch)) + s + (list #'char-upcase + #'char-downcase + #'char-downcase + #'char-downcase + #'char-upcase + #'char-downcase + #'char-downcase)))) + (assert-equal + (map 'list #'char-name expected) + (map 'list #'char-name (string-capitalize s)))))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/5a037b19ff7be80cca1fdd1f...