Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl

Commits:

3 changed files:

Changes:

  • src/code/char.lisp
    ... ... @@ -215,7 +215,8 @@
    215 215
            (let ((m (char-code (the base-char char))))
    
    216 216
     	 (or (< 31 m 127)
    
    217 217
     	     #+(and unicode (not unicode-bootstrap))
    
    218
    -	     (and (> m +unicode-lower-limit+)
    
    218
    +	     (and (/= m 181)
    
    219
    +                  (> m +unicode-lower-limit+)
    
    219 220
     		  (>= (unicode-category m) +unicode-category-graphic+))))))
    
    220 221
     
    
    221 222
     
    
    ... ... @@ -272,6 +273,11 @@
    272 273
         (or (< 64 m 91) (< 96 m 123)
    
    273 274
     	#+(and unicode (not unicode-bootstrap))
    
    274 275
     	(and (> m +unicode-lower-limit+)
    
    276
    +             ;; Unicode says Micro_sign is a lower case letter, but
    
    277
    +             ;; for CL, we don't want it to be a lower case letter.
    
    278
    +             ;; This is for compatibility with other Lisp
    
    279
    +             ;; implementations.
    
    280
    +             (/= m 181)
    
    275 281
     	     (<= +unicode-category-upper+
    
    276 282
     		 (unicode-category m)
    
    277 283
     		 +unicode-category-lower+)))))
    
    ... ... @@ -464,8 +470,12 @@
    464 470
           char)
    
    465 471
       #+(and unicode (not unicode-bootstrap))
    
    466 472
       (let ((m (char-code char)))
    
    467
    -    (cond ((> m +unicode-lower-limit+) (code-char (unicode-upper m)))
    
    468
    -	  ((< 96 m 123) (code-char (- m 32)))
    
    473
    +    (cond ((< 96 m 123) (code-char (- m 32)))
    
    474
    +          ((= m 181) char)
    
    475
    +          ((> m +unicode-lower-limit+)
    
    476
    +           (if (member (unicode-category m) '(92 32 75 109))
    
    477
    +               char
    
    478
    +               (code-char (unicode-upper m))))
    
    469 479
     	  (t char))))
    
    470 480
     
    
    471 481
     (defun char-titlecase (char)
    
    ... ... @@ -490,7 +500,10 @@
    490 500
           char)
    
    491 501
       #+(and unicode (not unicode-bootstrap))
    
    492 502
       (let ((m (char-code char)))
    
    493
    -    (cond ((> m +unicode-lower-limit+) (code-char (unicode-lower m)))
    
    503
    +    (cond ((> m +unicode-lower-limit+)
    
    504
    +           (if (member (unicode-category m) '(92 75 109))
    
    505
    +               char
    
    506
    +               (code-char (unicode-lower m))))
    
    494 507
     	  ((< 64 m 91) (code-char (+ m 32)))
    
    495 508
     	  (t char))))
    
    496 509
     
    

  • src/code/unidata.lisp
    ... ... @@ -883,9 +883,7 @@
    883 883
       (unless (unidata-scase *unicode-data*) (load-scase))
    
    884 884
       (let* ((scase (unidata-scase *unicode-data*))
    
    885 885
     	 (n (logand (qref32 scase code) #xFF)))
    
    886
    -    (if (or (zerop n)
    
    887
    -            ;; Ignore category Lt, Mn, Nl, So
    
    888
    -            (member (unicode-category code) '(92 32 75 109)))
    
    886
    +    (if (zerop n)
    
    889 887
     	code
    
    890 888
     	(let* ((m (aref (scase-svec scase) (logand n #x7F))))
    
    891 889
     	  (if (logbitp 7 n) (+ code m) (- code m))))))
    
    ... ... @@ -896,9 +894,7 @@
    896 894
       (unless (unidata-scase *unicode-data*) (load-scase))
    
    897 895
       (let* ((scase (unidata-scase *unicode-data*))
    
    898 896
     	 (n (logand (ash (qref32 scase code) -8) #xFF)))
    
    899
    -    (if (or (zerop n)
    
    900
    -            ;; Ignore category Lt, Nl, So
    
    901
    -            (member (unicode-category code) '(92 75 109)))
    
    897
    +    (if (zerop n)
    
    902 898
     	code
    
    903 899
     	(let ((m (aref (scase-svec scase) (logand n #x7F))))
    
    904 900
     	  (if (logbitp 7 n) (+ code m) (- code m))))))
    

  • src/compiler/srctran.lisp
    ... ... @@ -3337,27 +3337,36 @@
    3337 3337
     
    
    3338 3338
     (deftransform char-upcase ((x) (base-char))
    
    3339 3339
       "open code"
    
    3340
    +  ;; NOTE:  This MUST match what the function char-upcase does.
    
    3340 3341
       #-(and unicode (not unicode-bootstrap))
    
    3341 3342
       '(if (lower-case-p x)
    
    3342 3343
            (code-char (- (char-code x) 32))
    
    3343 3344
            x)
    
    3344 3345
       #+(and unicode (not unicode-bootstrap))
    
    3345 3346
       '(let ((m (char-code x)))
    
    3346
    -     (cond ((> m 127) (code-char (lisp::unicode-upper m)))
    
    3347
    -	   ((< 96 m 123) (code-char (- m 32)))
    
    3347
    +    (cond ((< 96 m 123) (code-char (- m 32)))
    
    3348
    +          ((= m 181) x)
    
    3349
    +          ((> m lisp::+unicode-lower-limit+)
    
    3350
    +           (if (member (unicode-category m) '(92 32 75 109))
    
    3351
    +               x
    
    3352
    +               (code-char (lisp::unicode-upper m))))
    
    3348 3353
     	   (t x))))
    
    3349 3354
     
    
    3350 3355
     (deftransform char-downcase ((x) (base-char))
    
    3351 3356
       "open code"
    
    3357
    +  ;; NOTE:  This MUST match what the function char-downcase does.
    
    3352 3358
       #-(and unicode (not unicode-bootstrap))
    
    3353 3359
       '(if (upper-case-p x)
    
    3354 3360
            (code-char (+ (char-code x) 32))
    
    3355 3361
            x)
    
    3356 3362
       #+(and unicode (not unicode-bootstrap))
    
    3357 3363
       '(let ((m (char-code x)))
    
    3358
    -     (cond ((> m 127) (code-char (lisp::unicode-lower m)))
    
    3359
    -	   ((< 64 m 91) (code-char (+ m 32)))
    
    3360
    -	   (t x))))
    
    3364
    +    (cond ((> m lisp::+unicode-lower-limit+)
    
    3365
    +           (if (member (unicode-category m) '(92 75 109))
    
    3366
    +               x
    
    3367
    +               (code-char (lisp::unicode-lower m))))
    
    3368
    +	  ((< 64 m 91) (code-char (+ m 32)))
    
    3369
    +	  (t x))))
    
    3361 3370
     
    
    3362 3371
     
    
    3363 3372
     ;;;; Equality predicate transforms: