Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
-
5a037b19
by Raymond Toy at 2024-05-31T08:56:12-07:00
4 changed files:
Changes:
| ... | ... | @@ -702,6 +702,7 @@ |
| 702 | 702 | (setf (schar newstring new-index) (schar string index)))
|
| 703 | 703 | newstring))))
|
| 704 | 704 | |
| 705 | +#+nil
|
|
| 705 | 706 | (defun string-capitalize (string &key (start 0) end)
|
| 706 | 707 | _N"Given a string, returns a copy of the string with the first
|
| 707 | 708 | character of each ``word'' converted to upper-case, and remaining
|
| ... | ... | @@ -744,8 +745,50 @@ |
| 744 | 745 | (setf (schar newstring new-index) (schar string index)))
|
| 745 | 746 | newstring))))
|
| 746 | 747 | |
| 748 | +(defun string-capitalize (string &key (start 0) end)
|
|
| 749 | + _N"Given a string, returns a copy of the string with the first
|
|
| 750 | + character of each ``word'' converted to upper-case, and remaining
|
|
| 751 | + chars in the word converted to lower case. A ``word'' is defined
|
|
| 752 | + to be a string of case-modifiable characters delimited by
|
|
| 753 | + non-case-modifiable chars."
|
|
| 754 | + (declare (fixnum start))
|
|
| 755 | + (let* ((string (if (stringp string) string (string string)))
|
|
| 756 | + (slen (length string)))
|
|
| 757 | + (declare (fixnum slen))
|
|
| 758 | + (with-one-string string start end offset
|
|
| 759 | + (let ((offset-slen (+ slen offset))
|
|
| 760 | + (newstring (make-string slen)))
|
|
| 761 | + (declare (fixnum offset-slen))
|
|
| 762 | + (do ((index offset (1+ index))
|
|
| 763 | + (new-index 0 (1+ new-index)))
|
|
| 764 | + ((= index start))
|
|
| 765 | + (declare (fixnum index new-index))
|
|
| 766 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 767 | + (do ((index start (1+ index))
|
|
| 768 | + (new-index (- start offset) (1+ new-index))
|
|
| 769 | + (newword t)
|
|
| 770 | + (char ()))
|
|
| 771 | + ((= index (the fixnum end)))
|
|
| 772 | + (declare (fixnum index new-index))
|
|
| 773 | + (setq char (schar string index))
|
|
| 774 | + (cond ((not (alphanumericp char))
|
|
| 775 | + (setq newword t))
|
|
| 776 | + (newword
|
|
| 777 | + ;;char is first case-modifiable after non-case-modifiable
|
|
| 778 | + (setq char (char-upcase char))
|
|
| 779 | + (setq newword ()))
|
|
| 780 | + ;;char is case-modifiable, but not first
|
|
| 781 | + (t (setq char (char-downcase char))))
|
|
| 782 | + (setf (schar newstring new-index) char))
|
|
| 783 | + (do ((index end (1+ index))
|
|
| 784 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
| 785 | + ((= index offset-slen))
|
|
| 786 | + (declare (fixnum index new-index))
|
|
| 787 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 788 | + newstring))))
|
|
| 789 | + |
|
| 747 | 790 | (defun nstring-upcase (string &key (start 0) end)
|
| 748 | - "Given a string, returns that string with all lower case alphabetic
|
|
| 791 | + _N"Given a string, returns that string with all lower case alphabetic
|
|
| 749 | 792 | characters converted to uppercase."
|
| 750 | 793 | (declare (fixnum start))
|
| 751 | 794 | (let ((save-header string))
|
| ... | ... | @@ -769,7 +812,7 @@ |
| 769 | 812 | save-header))
|
| 770 | 813 | |
| 771 | 814 | (defun nstring-downcase (string &key (start 0) end)
|
| 772 | - "Given a string, returns that string with all upper case alphabetic
|
|
| 815 | + _N"Given a string, returns that string with all upper case alphabetic
|
|
| 773 | 816 | characters converted to lowercase."
|
| 774 | 817 | (declare (fixnum start))
|
| 775 | 818 | (let ((save-header string))
|
| ... | ... | @@ -792,6 +835,7 @@ |
| 792 | 835 | (setf (schar string (incf index)) lo))))))
|
| 793 | 836 | save-header))
|
| 794 | 837 | |
| 838 | +#+nil
|
|
| 795 | 839 | (defun nstring-capitalize (string &key (start 0) end)
|
| 796 | 840 | "Given a string, returns that string with the first
|
| 797 | 841 | character of each ``word'' converted to upper-case, and remaining
|
| ... | ... | @@ -817,6 +861,31 @@ |
| 817 | 861 | (setf (schar string index) (char-downcase char))))))
|
| 818 | 862 | save-header))
|
| 819 | 863 | |
| 864 | +(defun nstring-capitalize (string &key (start 0) end)
|
|
| 865 | + _N"Given a string, returns that string with the first
|
|
| 866 | + character of each ``word'' converted to upper-case, and remaining
|
|
| 867 | + chars in the word converted to lower case. A ``word'' is defined
|
|
| 868 | + to be a string of case-modifiable characters delimited by
|
|
| 869 | + non-case-modifiable chars."
|
|
| 870 | + (declare (fixnum start))
|
|
| 871 | + (let ((save-header string))
|
|
| 872 | + (with-one-string string start end offset
|
|
| 873 | + (do ((index start (1+ index))
|
|
| 874 | + (newword t)
|
|
| 875 | + (char ()))
|
|
| 876 | + ((= index (the fixnum end)))
|
|
| 877 | + (declare (fixnum index))
|
|
| 878 | + (setq char (schar string index))
|
|
| 879 | + (cond ((not (alphanumericp char))
|
|
| 880 | + (setq newword t))
|
|
| 881 | + (newword
|
|
| 882 | + ;;char is first case-modifiable after non-case-modifiable
|
|
| 883 | + (setf (schar string index) (char-upcase char))
|
|
| 884 | + (setq newword ()))
|
|
| 885 | + (t
|
|
| 886 | + (setf (schar string index) (char-downcase char))))))
|
|
| 887 | + save-header))
|
|
| 888 | + |
|
| 820 | 889 | |
| 821 | 890 | #+unicode
|
| 822 | 891 | (progn
|
| ... | ... | @@ -441,6 +441,48 @@ |
| 441 | 441 | :end next)))
|
| 442 | 442 | (write-string string result :start end :end offset-slen))))))
|
| 443 | 443 | |
| 444 | +(defun string-capitalize-simple (string &key (start 0) end)
|
|
| 445 | + _N"Given a string, returns a copy of the string with the first
|
|
| 446 | + character of each ``word'' converted to upper-case, and remaining
|
|
| 447 | + chars in the word converted to lower case. A ``word'' is defined
|
|
| 448 | + to be a string of case-modifiable characters delimited by
|
|
| 449 | + non-case-modifiable chars."
|
|
| 450 | + (declare (fixnum start))
|
|
| 451 | + (let* ((string (if (stringp string) string (string string)))
|
|
| 452 | + (slen (length string)))
|
|
| 453 | + (declare (fixnum slen))
|
|
| 454 | + (with-one-string string start end offset
|
|
| 455 | + (let ((offset-slen (+ slen offset))
|
|
| 456 | + (newstring (make-string slen)))
|
|
| 457 | + (declare (fixnum offset-slen))
|
|
| 458 | + (do ((index offset (1+ index))
|
|
| 459 | + (new-index 0 (1+ new-index)))
|
|
| 460 | + ((= index start))
|
|
| 461 | + (declare (fixnum index new-index))
|
|
| 462 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 463 | + (do ((index start (1+ index))
|
|
| 464 | + (new-index (- start offset) (1+ new-index))
|
|
| 465 | + (newword t)
|
|
| 466 | + (char ()))
|
|
| 467 | + ((= index (the fixnum end)))
|
|
| 468 | + (declare (fixnum index new-index))
|
|
| 469 | + (setq char (schar string index))
|
|
| 470 | + (cond ((not (alphanumericp char))
|
|
| 471 | + (setq newword t))
|
|
| 472 | + (newword
|
|
| 473 | + ;;char is first case-modifiable after non-case-modifiable
|
|
| 474 | + (setq char (char-titlecase char))
|
|
| 475 | + (setq newword ()))
|
|
| 476 | + ;;char is case-modifiable, but not first
|
|
| 477 | + (t (setq char (char-downcase char))))
|
|
| 478 | + (setf (schar newstring new-index) char))
|
|
| 479 | + (do ((index end (1+ index))
|
|
| 480 | + (new-index (- (the fixnum end) offset) (1+ new-index)))
|
|
| 481 | + ((= index offset-slen))
|
|
| 482 | + (declare (fixnum index new-index))
|
|
| 483 | + (setf (schar newstring new-index) (schar string index)))
|
|
| 484 | + newstring))))
|
|
| 485 | + |
|
| 444 | 486 | (defun string-capitalize-full (string &key (start 0) end (casing :full))
|
| 445 | 487 | "Capitalize String using the Common Lisp word-break algorithm to find
|
| 446 | 488 | the words in String. The beginning is capitalized depending on the
|
| ... | ... | @@ -515,7 +557,7 @@ |
| 515 | 557 | (if unicode-word-break
|
| 516 | 558 | (string-capitalize-unicode string :start start :end end :casing casing)
|
| 517 | 559 | (if (eq casing :simple)
|
| 518 | - (cl:string-capitalize string :start start :end end)
|
|
| 560 | + (string-capitalize-simple string :start start :end end)
|
|
| 519 | 561 | (string-capitalize-full string :start start :end end :casing casing))))
|
| 520 | 562 | |
| 521 | 563 |
| ... | ... | @@ -3892,7 +3892,7 @@ msgid "" |
| 3892 | 3892 | " upper case alphabetic characters converted to lowercase."
|
| 3893 | 3893 | msgstr ""
|
| 3894 | 3894 | |
| 3895 | -#: src/code/string.lisp
|
|
| 3895 | +#: src/code/unicode.lisp src/code/string.lisp
|
|
| 3896 | 3896 | msgid ""
|
| 3897 | 3897 | "Given a string, returns a copy of the string with the first\n"
|
| 3898 | 3898 | " character of each ``word'' converted to upper-case, and remaining\n"
|
| ... | ... | @@ -59,3 +59,35 @@ |
| 59 | 59 | when (char/= actual (char-downcase expected))
|
| 60 | 60 | collect (list (char-downcase expected)
|
| 61 | 61 | (char-code actual))))))
|
| 62 | + |
|
| 63 | +(define-test string-capitalize
|
|
| 64 | + (:tag :issues)
|
|
| 65 | + (let* ((s
|
|
| 66 | + ;; This string contains a couple of characters where
|
|
| 67 | + ;; Unicode has a titlecase version of the character. We
|
|
| 68 | + ;; want to make sure we use char-upcase to capitalize the
|
|
| 69 | + ;; string instead of lisp::char-titlecse
|
|
| 70 | + (coerce
|
|
| 71 | + '(#\Latin_Small_Letter_Dz
|
|
| 72 | + #\a #\b
|
|
| 73 | + #\space
|
|
| 74 | + #\Latin_Small_Letter_Lj
|
|
| 75 | + #\A #\B)
|
|
| 76 | + 'string))
|
|
| 77 | + (expected
|
|
| 78 | + ;; Manually convert S to a capitalized string using
|
|
| 79 | + ;; char-upcase/downcase.
|
|
| 80 | + (map 'string
|
|
| 81 | + #'(lambda (ch f)
|
|
| 82 | + (funcall f ch))
|
|
| 83 | + s
|
|
| 84 | + (list #'char-upcase
|
|
| 85 | + #'char-downcase
|
|
| 86 | + #'char-downcase
|
|
| 87 | + #'char-downcase
|
|
| 88 | + #'char-upcase
|
|
| 89 | + #'char-downcase
|
|
| 90 | + #'char-downcase))))
|
|
| 91 | + (assert-equal
|
|
| 92 | + (map 'list #'char-name expected)
|
|
| 93 | + (map 'list #'char-name (string-capitalize s))))) |