Raymond Toy pushed to branch master at cmucl / cmucl

Commits:

4 changed files:

Changes:

  • src/code/char.lisp
    ... ... @@ -31,7 +31,6 @@
    31 31
     	  alphanumericp char= char/= char< char> char<= char>= char-equal
    
    32 32
     	  char-not-equal char-lessp char-greaterp char-not-greaterp
    
    33 33
     	  char-not-lessp character char-code code-char char-upcase
    
    34
    -	  char-titlecase title-case-p
    
    35 34
     	  char-downcase digit-char char-int char-name name-char
    
    36 35
     	  codepoint-limit codepoint))
    
    37 36
     
    
    ... ... @@ -259,7 +258,7 @@
    259 258
       (declare (character char))
    
    260 259
       (and (typep char 'base-char)
    
    261 260
            (let ((m (char-code (the base-char char))))
    
    262
    -	 (or (< 31 m 127)
    
    261
    +	 (or (<= (char-code #\space ) m (char-code #\~))
    
    263 262
     	     #+(and unicode (not unicode-bootstrap))
    
    264 263
     	     (and (> m +ascii-limit+)
    
    265 264
     		  (>= (unicode-category m) +unicode-category-graphic+))))))
    
    ... ... @@ -270,7 +269,8 @@
    270 269
       argument is an alphabetic character; otherwise NIL."
    
    271 270
       (declare (character char))
    
    272 271
       (let ((m (char-code char)))
    
    273
    -    (or (< 64 m 91) (< 96 m 123)
    
    272
    +    (or (<= (char-code #\A) m (char-code #\Z))
    
    273
    +        (<= (char-code #\a) m (char-code #\z))
    
    274 274
     	#+(and unicode (not unicode-bootstrap))
    
    275 275
     	(and (> m +ascii-limit+)
    
    276 276
     	     (<= +unicode-category-letter+ (unicode-category m)
    
    ... ... @@ -282,7 +282,7 @@
    282 282
       argument is an upper-case character, NIL otherwise."
    
    283 283
       (declare (character char))
    
    284 284
       (let ((m (char-code char)))
    
    285
    -    (or (< 64 m 91)
    
    285
    +    (or (<= (char-code #\A) m (char-code #\Z))
    
    286 286
     	#+(and unicode (not unicode-bootstrap))
    
    287 287
     	(and (> m +ascii-limit+)
    
    288 288
                  (not (zerop (ldb +lower-case-entry+ (case-mapping-entry m))))))))
    
    ... ... @@ -293,29 +293,19 @@
    293 293
       argument is a lower-case character, NIL otherwise."
    
    294 294
       (declare (character char))
    
    295 295
       (let ((m (char-code char)))
    
    296
    -    (or (< 96 m 123)
    
    296
    +    (or (<= (char-code #\a) m (char-code #\z))
    
    297 297
     	#+(and unicode (not unicode-bootstrap))
    
    298 298
     	(and (> m +ascii-limit+)
    
    299 299
                  (not (zerop (ldb +upper-case-entry+ (case-mapping-entry m))))))))
    
    300 300
     
    
    301
    -(defun title-case-p (char)
    
    302
    -  "The argument must be a character object; title-case-p returns T if the
    
    303
    -  argument is a title-case character, NIL otherwise."
    
    304
    -  (declare (character char))
    
    305
    -  (let ((m (char-code char)))
    
    306
    -    (or (< 64 m 91)
    
    307
    -	#+(and unicode (not unicode-bootstrap))
    
    308
    -	(and (> m +ascii-limit+)
    
    309
    -	     (= (unicode-category m) +unicode-category-title+)))))
    
    310
    -
    
    311
    -
    
    312 301
     (defun both-case-p (char)
    
    313 302
       "The argument must be a character object.  Both-case-p returns T if the
    
    314 303
       argument is an alphabetic character and if the character exists in
    
    315 304
       both upper and lower case.  For ASCII, this is the same as Alpha-char-p."
    
    316 305
       (declare (character char))
    
    317 306
       (let ((m (char-code char)))
    
    318
    -    (or (< 64 m 91) (< 96 m 123)
    
    307
    +    (or (<= (char-code #\A) m (char-code #\Z))
    
    308
    +        (<= (char-code #\a) m (char-code #\z))
    
    319 309
     	#+(and unicode (not unicode-bootstrap))
    
    320 310
     	(and (> m +ascii-limit+)
    
    321 311
                  (not (zerop (case-mapping-entry m)))))))
    
    ... ... @@ -347,7 +337,9 @@
    347 337
       (declare (character char))
    
    348 338
       (let ((m (char-code char)))
    
    349 339
         ;; Shortcut for ASCII digits and upper and lower case ASCII letters
    
    350
    -    (or (< 47 m 58) (< 64 m 91) (< 96 m 123)
    
    340
    +    (or (<= (char-code #\0) m (char-code #\9))
    
    341
    +        (<= (char-code #\A) m (char-code #\Z))
    
    342
    +        (<= (char-code #\a) m (char-code #\z))
    
    351 343
     	#+(and unicode (not unicode-bootstrap))
    
    352 344
     	(and (> m +ascii-limit+)
    
    353 345
     	     (<= +unicode-category-letter+ (unicode-category m)
    
    ... ... @@ -418,14 +410,14 @@
    418 410
     
    
    419 411
     (defmacro equal-char-code (character)
    
    420 412
       `(let ((ch (char-code ,character)))
    
    421
    -     ;; Handle ASCII separately for bootstrapping and for unidata missing.
    
    422
    -     (if (< 64 ch 91)
    
    423
    -	 (+ ch 32)
    
    424
    -	 #-(and unicode (not unicode-bootstrap))
    
    425
    -	 ch
    
    426
    -	 #+(and unicode (not unicode-bootstrap))
    
    427
    -	 (if (> ch +ascii-limit+) (unicode-lower ch) ch))))
    
    428
    -
    
    413
    +     ;; Handle ASCII separately for bootstrapping.
    
    414
    +     (cond ((<= (char-code #\A) ch (char-code #\Z))
    
    415
    +            (logxor ch #x20))
    
    416
    +	   #+(and unicode (not unicode-bootstrap))
    
    417
    +           ((> ch +ascii-limit+)
    
    418
    +            (case-mapping-lower-case ch))
    
    419
    +           (t
    
    420
    +            ch))))
    
    429 421
     
    
    430 422
     (defun char-equal (character &rest more-characters)
    
    431 423
       "Returns T if all of its arguments are the same character.
    
    ... ... @@ -504,19 +496,6 @@
    504 496
       (declare (character char))
    
    505 497
       (char-upcase char))
    
    506 498
     
    
    507
    -(defun char-titlecase (char)
    
    508
    -  "Returns CHAR converted to title-case if that is possible."
    
    509
    -  (declare (character char))
    
    510
    -  #-(and unicode (not unicode-bootstrap))
    
    511
    -  (if (lower-case-p char)
    
    512
    -      (code-char (- (char-code char) 32))
    
    513
    -      char)
    
    514
    -  #+(and unicode (not unicode-bootstrap))
    
    515
    -  (let ((m (char-code char)))
    
    516
    -    (cond ((> m +ascii-limit+) (code-char (unicode-title m)))
    
    517
    -	  ((< 96 m 123) (code-char (- m 32)))
    
    518
    -	  (t char))))
    
    519
    -
    
    520 499
     (defun char-downcase (char)
    
    521 500
       "Returns CHAR converted to lower-case if that is possible."
    
    522 501
       (declare (character char))
    

  • src/code/unicode.lisp
    ... ... @@ -488,6 +488,26 @@
    488 488
     	       (declare (ignore c))
    
    489 489
     	       (lookup (+ i (if (eql widep 1) 2 1)) (left-context i))))))))
    
    490 490
     
    
    491
    +(defun char-titlecase (char)
    
    492
    +  "Returns CHAR converted to title-case if that is possible."
    
    493
    +  (declare (character char))
    
    494
    +  (let ((m (char-code char)))
    
    495
    +    (cond ((<= (char-code #\a) m (char-code #\z))
    
    496
    +           (code-char (logxor m #x20)))
    
    497
    +          #+(and unicode (not unicode-bootstrap))
    
    498
    +	  ((> m +ascii-limit+) (code-char (unicode-title m)))
    
    499
    +	  (t char))))
    
    500
    +
    
    501
    +(defun title-case-p (char)
    
    502
    +  "The argument must be a character object; title-case-p returns T if the
    
    503
    +  argument is a title-case character, NIL otherwise."
    
    504
    +  (declare (character char))
    
    505
    +  (let ((m (char-code char)))
    
    506
    +    (or (<= (code-char #\A) m (code-char #\Z))
    
    507
    +	#+(and unicode (not unicode-bootstrap))
    
    508
    +	(and (> m +ascii-limit+)
    
    509
    +	     (= (unicode-category m) +unicode-category-title+)))))
    
    510
    +
    
    491 511
     (defun string-capitalize-unicode (string &key (start 0) end (casing :simple))
    
    492 512
       "Capitalize String using the Unicode word-break algorithm to find
    
    493 513
       the words in String.  The beginning is capitalized depending on the
    

  • src/compiler/srctran.lisp
    ... ... @@ -3343,7 +3343,8 @@
    3343 3343
            x)
    
    3344 3344
       #+(and unicode (not unicode-bootstrap))
    
    3345 3345
       '(let ((m (char-code x)))
    
    3346
    -    (cond ((< 96 m 123) (code-char (- m 32)))
    
    3346
    +    (cond ((<= (char-code #\a) m (char-code #\z))
    
    3347
    +           (code-char (logxor m #x20)))
    
    3347 3348
               ((> m lisp::+ascii-limit+)
    
    3348 3349
                (code-char (lisp::case-mapping-upper-case m)))
    
    3349 3350
     	   (t x))))
    
    ... ... @@ -3356,7 +3357,8 @@
    3356 3357
            x)
    
    3357 3358
       #+(and unicode (not unicode-bootstrap))
    
    3358 3359
       '(let ((m (char-code x)))
    
    3359
    -    (cond ((< 64 m 91) (code-char (+ m 32)))
    
    3360
    +    (cond ((<= (char-code #\A) m (char-code #\Z))
    
    3361
    +           (code-char (logxor m #x20)))
    
    3360 3362
               ((> m lisp::+ascii-limit+)
    
    3361 3363
                (code-char (lisp::case-mapping-lower-case m)))
    
    3362 3364
     	  (t x))))
    

  • tests/char.lisp
    1
    +;; Tests of char functions
    
    2
    +
    
    3
    +(defpackage :char-tests
    
    4
    +  (:use :cl :lisp-unit))
    
    5
    +
    
    6
    +(in-package "CHAR-TESTS")
    
    7
    +
    
    8
    +(define-test char-equal
    
    9
    +  (:tag :issues)
    
    10
    +  (let ((test-codes
    
    11
    +          ;; Find all the codes where the CL lower case character
    
    12
    +          ;; doesn't match the Unicode lower case character.
    
    13
    +          (loop for code from 128 below char-code-limit
    
    14
    +               for ch = (code-char code)
    
    15
    +               when (/= (char-code (char-downcase ch)) (or (lisp::unicode-lower code) code))
    
    16
    +                 collect code)))
    
    17
    +    (dolist (code test-codes)
    
    18
    +      ;; Verify that we convert to the CL lower case character instead
    
    19
    +      ;; of the Unicode lower case character for the cases where these
    
    20
    +      ;; are different.
    
    21
    +      (assert-false (char-equal (code-char (lisp::unicode-lower code))
    
    22
    +                                (code-char code))
    
    23
    +                    code
    
    24
    +                    (lisp::unicode-lower code)))))