Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
-
5a037b19
by Raymond Toy at 2024-05-31T08:56:12-07:00
4 changed files:
Changes:
... | ... | @@ -702,6 +702,7 @@ |
702 | 702 | (setf (schar newstring new-index) (schar string index)))
|
703 | 703 | newstring))))
|
704 | 704 | |
705 | +#+nil
|
|
705 | 706 | (defun string-capitalize (string &key (start 0) end)
|
706 | 707 | _N"Given a string, returns a copy of the string with the first
|
707 | 708 | character of each ``word'' converted to upper-case, and remaining
|
... | ... | @@ -744,8 +745,50 @@ |
744 | 745 | (setf (schar newstring new-index) (schar string index)))
|
745 | 746 | newstring))))
|
746 | 747 | |
748 | +(defun string-capitalize (string &key (start 0) end)
|
|
749 | + _N"Given a string, returns a copy of the string with the first
|
|
750 | + character of each ``word'' converted to upper-case, and remaining
|
|
751 | + chars in the word converted to lower case. A ``word'' is defined
|
|
752 | + to be a string of case-modifiable characters delimited by
|
|
753 | + non-case-modifiable chars."
|
|
754 | + (declare (fixnum start))
|
|
755 | + (let* ((string (if (stringp string) string (string string)))
|
|
756 | + (slen (length string)))
|
|
757 | + (declare (fixnum slen))
|
|
758 | + (with-one-string string start end offset
|
|
759 | + (let ((offset-slen (+ slen offset))
|
|
760 | + (newstring (make-string slen)))
|
|
761 | + (declare (fixnum offset-slen))
|
|
762 | + (do ((index offset (1+ index))
|
|
763 | + (new-index 0 (1+ new-index)))
|
|
764 | + ((= index start))
|
|
765 | + (declare (fixnum index new-index))
|
|
766 | + (setf (schar newstring new-index) (schar string index)))
|
|
767 | + (do ((index start (1+ index))
|
|
768 | + (new-index (- start offset) (1+ new-index))
|
|
769 | + (newword t)
|
|
770 | + (char ()))
|
|
771 | + ((= index (the fixnum end)))
|
|
772 | + (declare (fixnum index new-index))
|
|
773 | + (setq char (schar string index))
|
|
774 | + (cond ((not (alphanumericp char))
|
|
775 | + (setq newword t))
|
|
776 | + (newword
|
|
777 | + ;;char is first case-modifiable after non-case-modifiable
|
|
778 | + (setq char (char-upcase char))
|
|
779 | + (setq newword ()))
|
|
780 | + ;;char is case-modifiable, but not first
|
|
781 | + (t (setq char (char-downcase char))))
|
|
782 | + (setf (schar newstring new-index) char))
|
|
783 | + (do ((index end (1+ index))
|
|
784 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
785 | + ((= index offset-slen))
|
|
786 | + (declare (fixnum index new-index))
|
|
787 | + (setf (schar newstring new-index) (schar string index)))
|
|
788 | + newstring))))
|
|
789 | + |
|
747 | 790 | (defun nstring-upcase (string &key (start 0) end)
|
748 | - "Given a string, returns that string with all lower case alphabetic
|
|
791 | + _N"Given a string, returns that string with all lower case alphabetic
|
|
749 | 792 | characters converted to uppercase."
|
750 | 793 | (declare (fixnum start))
|
751 | 794 | (let ((save-header string))
|
... | ... | @@ -769,7 +812,7 @@ |
769 | 812 | save-header))
|
770 | 813 | |
771 | 814 | (defun nstring-downcase (string &key (start 0) end)
|
772 | - "Given a string, returns that string with all upper case alphabetic
|
|
815 | + _N"Given a string, returns that string with all upper case alphabetic
|
|
773 | 816 | characters converted to lowercase."
|
774 | 817 | (declare (fixnum start))
|
775 | 818 | (let ((save-header string))
|
... | ... | @@ -792,6 +835,7 @@ |
792 | 835 | (setf (schar string (incf index)) lo))))))
|
793 | 836 | save-header))
|
794 | 837 | |
838 | +#+nil
|
|
795 | 839 | (defun nstring-capitalize (string &key (start 0) end)
|
796 | 840 | "Given a string, returns that string with the first
|
797 | 841 | character of each ``word'' converted to upper-case, and remaining
|
... | ... | @@ -817,6 +861,31 @@ |
817 | 861 | (setf (schar string index) (char-downcase char))))))
|
818 | 862 | save-header))
|
819 | 863 | |
864 | +(defun nstring-capitalize (string &key (start 0) end)
|
|
865 | + _N"Given a string, returns that string with the first
|
|
866 | + character of each ``word'' converted to upper-case, and remaining
|
|
867 | + chars in the word converted to lower case. A ``word'' is defined
|
|
868 | + to be a string of case-modifiable characters delimited by
|
|
869 | + non-case-modifiable chars."
|
|
870 | + (declare (fixnum start))
|
|
871 | + (let ((save-header string))
|
|
872 | + (with-one-string string start end offset
|
|
873 | + (do ((index start (1+ index))
|
|
874 | + (newword t)
|
|
875 | + (char ()))
|
|
876 | + ((= index (the fixnum end)))
|
|
877 | + (declare (fixnum index))
|
|
878 | + (setq char (schar string index))
|
|
879 | + (cond ((not (alphanumericp char))
|
|
880 | + (setq newword t))
|
|
881 | + (newword
|
|
882 | + ;;char is first case-modifiable after non-case-modifiable
|
|
883 | + (setf (schar string index) (char-upcase char))
|
|
884 | + (setq newword ()))
|
|
885 | + (t
|
|
886 | + (setf (schar string index) (char-downcase char))))))
|
|
887 | + save-header))
|
|
888 | + |
|
820 | 889 | |
821 | 890 | #+unicode
|
822 | 891 | (progn
|
... | ... | @@ -441,6 +441,48 @@ |
441 | 441 | :end next)))
|
442 | 442 | (write-string string result :start end :end offset-slen))))))
|
443 | 443 | |
444 | +(defun string-capitalize-simple (string &key (start 0) end)
|
|
445 | + _N"Given a string, returns a copy of the string with the first
|
|
446 | + character of each ``word'' converted to upper-case, and remaining
|
|
447 | + chars in the word converted to lower case. A ``word'' is defined
|
|
448 | + to be a string of case-modifiable characters delimited by
|
|
449 | + non-case-modifiable chars."
|
|
450 | + (declare (fixnum start))
|
|
451 | + (let* ((string (if (stringp string) string (string string)))
|
|
452 | + (slen (length string)))
|
|
453 | + (declare (fixnum slen))
|
|
454 | + (with-one-string string start end offset
|
|
455 | + (let ((offset-slen (+ slen offset))
|
|
456 | + (newstring (make-string slen)))
|
|
457 | + (declare (fixnum offset-slen))
|
|
458 | + (do ((index offset (1+ index))
|
|
459 | + (new-index 0 (1+ new-index)))
|
|
460 | + ((= index start))
|
|
461 | + (declare (fixnum index new-index))
|
|
462 | + (setf (schar newstring new-index) (schar string index)))
|
|
463 | + (do ((index start (1+ index))
|
|
464 | + (new-index (- start offset) (1+ new-index))
|
|
465 | + (newword t)
|
|
466 | + (char ()))
|
|
467 | + ((= index (the fixnum end)))
|
|
468 | + (declare (fixnum index new-index))
|
|
469 | + (setq char (schar string index))
|
|
470 | + (cond ((not (alphanumericp char))
|
|
471 | + (setq newword t))
|
|
472 | + (newword
|
|
473 | + ;;char is first case-modifiable after non-case-modifiable
|
|
474 | + (setq char (char-titlecase char))
|
|
475 | + (setq newword ()))
|
|
476 | + ;;char is case-modifiable, but not first
|
|
477 | + (t (setq char (char-downcase char))))
|
|
478 | + (setf (schar newstring new-index) char))
|
|
479 | + (do ((index end (1+ index))
|
|
480 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
481 | + ((= index offset-slen))
|
|
482 | + (declare (fixnum index new-index))
|
|
483 | + (setf (schar newstring new-index) (schar string index)))
|
|
484 | + newstring))))
|
|
485 | + |
|
444 | 486 | (defun string-capitalize-full (string &key (start 0) end (casing :full))
|
445 | 487 | "Capitalize String using the Common Lisp word-break algorithm to find
|
446 | 488 | the words in String. The beginning is capitalized depending on the
|
... | ... | @@ -515,7 +557,7 @@ |
515 | 557 | (if unicode-word-break
|
516 | 558 | (string-capitalize-unicode string :start start :end end :casing casing)
|
517 | 559 | (if (eq casing :simple)
|
518 | - (cl:string-capitalize string :start start :end end)
|
|
560 | + (string-capitalize-simple string :start start :end end)
|
|
519 | 561 | (string-capitalize-full string :start start :end end :casing casing))))
|
520 | 562 | |
521 | 563 |
... | ... | @@ -3892,7 +3892,7 @@ msgid "" |
3892 | 3892 | " upper case alphabetic characters converted to lowercase."
|
3893 | 3893 | msgstr ""
|
3894 | 3894 | |
3895 | -#: src/code/string.lisp
|
|
3895 | +#: src/code/unicode.lisp src/code/string.lisp
|
|
3896 | 3896 | msgid ""
|
3897 | 3897 | "Given a string, returns a copy of the string with the first\n"
|
3898 | 3898 | " character of each ``word'' converted to upper-case, and remaining\n"
|
... | ... | @@ -59,3 +59,35 @@ |
59 | 59 | when (char/= actual (char-downcase expected))
|
60 | 60 | collect (list (char-downcase expected)
|
61 | 61 | (char-code actual))))))
|
62 | + |
|
63 | +(define-test string-capitalize
|
|
64 | + (:tag :issues)
|
|
65 | + (let* ((s
|
|
66 | + ;; This string contains a couple of characters where
|
|
67 | + ;; Unicode has a titlecase version of the character. We
|
|
68 | + ;; want to make sure we use char-upcase to capitalize the
|
|
69 | + ;; string instead of lisp::char-titlecse
|
|
70 | + (coerce
|
|
71 | + '(#\Latin_Small_Letter_Dz
|
|
72 | + #\a #\b
|
|
73 | + #\space
|
|
74 | + #\Latin_Small_Letter_Lj
|
|
75 | + #\A #\B)
|
|
76 | + 'string))
|
|
77 | + (expected
|
|
78 | + ;; Manually convert S to a capitalized string using
|
|
79 | + ;; char-upcase/downcase.
|
|
80 | + (map 'string
|
|
81 | + #'(lambda (ch f)
|
|
82 | + (funcall f ch))
|
|
83 | + s
|
|
84 | + (list #'char-upcase
|
|
85 | + #'char-downcase
|
|
86 | + #'char-downcase
|
|
87 | + #'char-downcase
|
|
88 | + #'char-upcase
|
|
89 | + #'char-downcase
|
|
90 | + #'char-downcase))))
|
|
91 | + (assert-equal
|
|
92 | + (map 'list #'char-name expected)
|
|
93 | + (map 'list #'char-name (string-capitalize s))))) |