[Git][cmucl/cmucl][master] String-capitalize should use char-upcase, not lisp:char-titlecase
Raymond Toy pushed to branch master at cmucl / cmucl Commits: 5a037b19 by Raymond Toy at 2024-05-31T08:56:12-07:00 String-capitalize should use char-upcase, not lisp:char-titlecase We were using lisp:;char-titlecase to capitalize the string, but we really should use char-upcase. However, to preserve this functionality, we moved the functions to the `unicode` package. Added a test for this. - - - - - 4 changed files: - src/code/string.lisp - src/code/unicode.lisp - src/i18n/locale/cmucl.pot - tests/string.lisp Changes: ===================================== src/code/string.lisp ===================================== @@ -702,6 +702,7 @@ (setf (schar newstring new-index) (schar string index))) newstring)))) +#+nil (defun string-capitalize (string &key (start 0) end) _N"Given a string, returns a copy of the string with the first character of each ``word'' converted to upper-case, and remaining @@ -744,8 +745,50 @@ (setf (schar newstring new-index) (schar string index))) newstring)))) +(defun string-capitalize (string &key (start 0) end) + _N"Given a string, returns a copy of the string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let* ((string (if (stringp string) string (string string))) + (slen (length string))) + (declare (fixnum slen)) + (with-one-string string start end offset + (let ((offset-slen (+ slen offset)) + (newstring (make-string slen))) + (declare (fixnum offset-slen)) + (do ((index offset (1+ index)) + (new-index 0 (1+ new-index))) + ((= index start)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + (do ((index start (1+ index)) + (new-index (- start offset) (1+ new-index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index new-index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setq char (char-upcase char)) + (setq newword ())) + ;;char is case-modifiable, but not first + (t (setq char (char-downcase char)))) + (setf (schar newstring new-index) char)) + (do ((index end (1+ index)) + (new-index (- (the fixnum end) offset) (1+ new-index))) + ((= index offset-slen)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + newstring)))) + (defun nstring-upcase (string &key (start 0) end) - "Given a string, returns that string with all lower case alphabetic + _N"Given a string, returns that string with all lower case alphabetic characters converted to uppercase." (declare (fixnum start)) (let ((save-header string)) @@ -769,7 +812,7 @@ save-header)) (defun nstring-downcase (string &key (start 0) end) - "Given a string, returns that string with all upper case alphabetic + _N"Given a string, returns that string with all upper case alphabetic characters converted to lowercase." (declare (fixnum start)) (let ((save-header string)) @@ -792,6 +835,7 @@ (setf (schar string (incf index)) lo)))))) save-header)) +#+nil (defun nstring-capitalize (string &key (start 0) end) "Given a string, returns that string with the first character of each ``word'' converted to upper-case, and remaining @@ -817,6 +861,31 @@ (setf (schar string index) (char-downcase char)))))) save-header)) +(defun nstring-capitalize (string &key (start 0) end) + _N"Given a string, returns that string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let ((save-header string)) + (with-one-string string start end offset + (do ((index start (1+ index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setf (schar string index) (char-upcase char)) + (setq newword ())) + (t + (setf (schar string index) (char-downcase char)))))) + save-header)) + #+unicode (progn ===================================== src/code/unicode.lisp ===================================== @@ -441,6 +441,48 @@ :end next))) (write-string string result :start end :end offset-slen)))))) +(defun string-capitalize-simple (string &key (start 0) end) + _N"Given a string, returns a copy of the string with the first + character of each ``word'' converted to upper-case, and remaining + chars in the word converted to lower case. A ``word'' is defined + to be a string of case-modifiable characters delimited by + non-case-modifiable chars." + (declare (fixnum start)) + (let* ((string (if (stringp string) string (string string))) + (slen (length string))) + (declare (fixnum slen)) + (with-one-string string start end offset + (let ((offset-slen (+ slen offset)) + (newstring (make-string slen))) + (declare (fixnum offset-slen)) + (do ((index offset (1+ index)) + (new-index 0 (1+ new-index))) + ((= index start)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + (do ((index start (1+ index)) + (new-index (- start offset) (1+ new-index)) + (newword t) + (char ())) + ((= index (the fixnum end))) + (declare (fixnum index new-index)) + (setq char (schar string index)) + (cond ((not (alphanumericp char)) + (setq newword t)) + (newword + ;;char is first case-modifiable after non-case-modifiable + (setq char (char-titlecase char)) + (setq newword ())) + ;;char is case-modifiable, but not first + (t (setq char (char-downcase char)))) + (setf (schar newstring new-index) char)) + (do ((index end (1+ index)) + (new-index (- (the fixnum end) offset) (1+ new-index))) + ((= index offset-slen)) + (declare (fixnum index new-index)) + (setf (schar newstring new-index) (schar string index))) + newstring)))) + (defun string-capitalize-full (string &key (start 0) end (casing :full)) "Capitalize String using the Common Lisp word-break algorithm to find the words in String. The beginning is capitalized depending on the @@ -515,7 +557,7 @@ (if unicode-word-break (string-capitalize-unicode string :start start :end end :casing casing) (if (eq casing :simple) - (cl:string-capitalize string :start start :end end) + (string-capitalize-simple string :start start :end end) (string-capitalize-full string :start start :end end :casing casing)))) ===================================== src/i18n/locale/cmucl.pot ===================================== @@ -3892,7 +3892,7 @@ msgid "" " upper case alphabetic characters converted to lowercase." msgstr "" -#: src/code/string.lisp +#: src/code/unicode.lisp src/code/string.lisp msgid "" "Given a string, returns a copy of the string with the first\n" " character of each ``word'' converted to upper-case, and remaining\n" ===================================== tests/string.lisp ===================================== @@ -59,3 +59,35 @@ when (char/= actual (char-downcase expected)) collect (list (char-downcase expected) (char-code actual)))))) + +(define-test string-capitalize + (:tag :issues) + (let* ((s + ;; This string contains a couple of characters where + ;; Unicode has a titlecase version of the character. We + ;; want to make sure we use char-upcase to capitalize the + ;; string instead of lisp::char-titlecse + (coerce + '(#\Latin_Small_Letter_Dz + #\a #\b + #\space + #\Latin_Small_Letter_Lj + #\A #\B) + 'string)) + (expected + ;; Manually convert S to a capitalized string using + ;; char-upcase/downcase. + (map 'string + #'(lambda (ch f) + (funcall f ch)) + s + (list #'char-upcase + #'char-downcase + #'char-downcase + #'char-downcase + #'char-upcase + #'char-downcase + #'char-downcase)))) + (assert-equal + (map 'list #'char-name expected) + (map 'list #'char-name (string-capitalize s))))) View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/5a037b19ff7be80cca1fdd1f... -- This project does not include diff previews in email notifications. View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/5a037b19ff7be80cca1fdd1f... You're receiving this email because of your account on gitlab.common-lisp.net.
participants (1)
-
Raymond Toy (@rtoy)