GitLab

at 2024-05-17T07:54:35-07:00

Clean up comments and code

Add some better comments and remove some old commented-out code.

Make nstring casing consistent with char casing.

Use char-upcase/downcase to compute the desired case for
nstring-upcase/downcase.  Add tests for this as well.

Update pot file for updated docstring

 (deftype codepoint ()

   `(integer 0 (,codepoint-limit)))

-;; This MUST be greater than or equal to 127!

+;; This MUST be greater than or equal to 127!  Be very careful about

+;; changing this.  It affects all kinds of casing issues.

 (defconstant +unicode-lower-limit+

   191

-  "A character code strictly larger than this is handled using Unicode rules.")

+  "A character code strictly larger than this is handled using Unicode

+  rules.")

 (macrolet ((frob (char-names-list)

 	  (declare (fixnum index new-index))

 	  (multiple-value-bind (code wide) (codepoint string index)

 	    (when wide (incf index))

-	    ;; Handle ASCII specially because this is called early in

-	    ;; initialization, before unidata is available.

-            #+nil

-	    (cond ((< 96 code 123) (decf code 32))

-		  #+unicode

-		  ((> code 127) (setq code (unicode-upper code))))

+            ;; Use char-upcase if it's not a surrogate pair so that

+            ;; we're always consist.

             (if wide

                 (setq code (unicode-upper code))

                 (setf code (char-code (char-upcase (code-char code)))))

 	  (declare (fixnum index new-index))

 	  (multiple-value-bind (code wide) (codepoint string index)

 	    (when wide (incf index))

-	    ;; Handle ASCII specially because this is called early in

-	    ;; initialization, before unidata is available.

-            #+nil

-	    (cond ((< 64 code 91) (incf code 32))

-		  ((> code 127) (setq code (unicode-lower code))))

+            ;; Use char-downcase if it's not a surrogate pair so that

+            ;; we're always consist.

             (if wide

                 (setq code (unicode-lower code))

                 (setq code (char-code (char-downcase (code-char code)))))

 	  ((= index (the fixnum end)))

 	(declare (fixnum index))

 	(multiple-value-bind (code wide) (codepoint string index)

-	  (declare (ignore wide))

-	  ;; Handle ASCII specially because this is called early in

-	  ;; initialization, before unidata is available.

-	  (cond ((< 96 code 123) (decf code 32))

-		#+unicode

-		((> code 127) (setq code (unicode-upper code))))

+          (if wide

+              (setq code (unicode-upper code))

+              (setf code (char-code (char-upcase (code-char code)))))

 	  ;;@@ WARNING: this may, in theory, need to extend string

 	  ;;      (which, obviously, we can't do here.  Unless

 	  ;;       STRING is adjustable, maybe)

 	  ((= index (the fixnum end)))

 	(declare (fixnum index))

 	(multiple-value-bind (code wide) (codepoint string index)

-	  (declare (ignore wide))

-	  (cond ((< 64 code 91) (incf code 32))

-		#+unicode

-		((> code 127) (setq code (unicode-lower code))))

+          (if wide

+              (setq code (unicode-lower code))

+              (setf code (char-code (char-downcase (code-char code)))))

 	  ;;@@ WARNING: this may, in theory, need to extend string

 	  ;;      (which, obviously, we can't do here.  Unless

 	  ;;       STRING is adjustable, maybe)

 #: src/code/char.lisp

 msgid ""

-"A character code strictly larger than this is handled using Unicode rules."

+"A character code strictly larger than this is handled using Unicode\n"

+"  rules."

 msgstr ""

 #: src/code/char.lisp

      (loop for expected across s

            and actual across s-upcase

            when (char/= actual (char-upcase expected))

-             collect (list (char-upcase (char-code expected))

+             collect (list (char-upcase expected)

                            (char-code actual))))))

 (define-test string-downcase

      (loop for expected across s

            and actual across s-downcase

            when (char/= actual (char-downcase expected))

-             collect (list (char-downcase (char-code expected))

+             collect (list (char-downcase expected)

+                           (char-code actual))))))

+

+(define-test nstring-upcase

+    (:tag :issues)

+  (let* ((s (make-test-string))

+         (ns (make-test-string))

+         (ns-upcase (nstring-upcase ns)))

+    (assert-false

+     (loop for expected across s

+           and actual across ns

+           when (char/= actual (char-upcase expected))

+             collect (list (char-upcase expected)

+                           (char-code actual))))))

+

+(define-test nstring-downcase

+    (:tag :issues)

+  (let* ((s (make-test-string))

+         (ns (make-test-string))

+         (ns-downcase (nstring-downcase ns)))

+    (assert-false

+     (loop for expected across s

+           and actual across ns

+           when (char/= actual (char-downcase expected))

+             collect (list (char-downcase expected)

                            (char-code actual))))))

Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl

Commits:

4 changed files:

Changes: