Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl
Commits:
-
2764c097
by Raymond Toy at 2024-05-29T08:13:52-07:00
-
0f411c4b
by Raymond Toy at 2024-05-29T08:16:03-07:00
-
5fb9f515
by Raymond Toy at 2024-05-29T08:17:56-07:00
-
2e4de525
by Raymond Toy at 2024-05-29T08:18:15-07:00
5 changed files:
- src/code/char.lisp
- src/compiler/srctran.lisp
- src/lisp/GNUmakefile
- src/lisp/case-table.c → src/lisp/case-mapping.c
- src/tools/create-case-table.lisp
Changes:
... | ... | @@ -68,8 +68,8 @@ |
68 | 68 | rules.")
|
69 | 69 | |
70 | 70 | ;; Table of mappings for upper case and lower case letters. See
|
71 | -;; src/lisp/case-table.c.
|
|
72 | -(alien:def-alien-variable "case_table"
|
|
71 | +;; src/lisp/case-mapping.c.
|
|
72 | +(alien:def-alien-variable "case_mapping"
|
|
73 | 73 | (alien:array (alien:* (alien:array c-call:unsigned-int 64)) 1024))
|
74 | 74 | |
75 | 75 | ;; Each entry in the case table consists of the code for either an
|
... | ... | @@ -81,9 +81,9 @@ |
81 | 81 | "Number of bits used for the index of the second stage table of the
|
82 | 82 | case mapping table.")
|
83 | 83 | |
84 | -(declaim (inline case-table-entry))
|
|
84 | +(declaim (inline case-mapping-entry))
|
|
85 | 85 | |
86 | -(defun case-table-entry (code)
|
|
86 | +(defun case-mapping-entry (code)
|
|
87 | 87 | "For the character code, CODE, return 0 or the 32-bit value from the
|
88 | 88 | case table. A value of 0 means there was no case mapping (neither
|
89 | 89 | upper nor lower case)."
|
... | ... | @@ -93,24 +93,24 @@ |
93 | 93 | code))
|
94 | 94 | (index2 (ldb (byte +stage2-size+ 0)
|
95 | 95 | code))
|
96 | - (stage2-sap (alien:alien-sap (alien:deref case-table index1))))
|
|
96 | + (stage2-sap (alien:alien-sap (alien:deref case-mapping index1))))
|
|
97 | 97 | (sys:sap-ref-32 stage2-sap (* 4 index2))))
|
98 | 98 | |
99 | -(declaim (inline case-table-lower-case))
|
|
100 | -(defun case-table-lower-case (code)
|
|
99 | +(declaim (inline case-mapping-lower-case))
|
|
100 | +(defun case-mapping-lower-case (code)
|
|
101 | 101 | "Compute the lower-case character code for the given character CODE.
|
102 | 102 | If no lower-case code exists, just return CODE."
|
103 | 103 | (declare (type (integer 0 (#.char-code-limit)) code)
|
104 | 104 | (optimize (speed 3)))
|
105 | - (ldb (byte 16 0) (- code (ldb +lower-case-entry+ (case-table-entry code)))))
|
|
105 | + (ldb (byte 16 0) (- code (ldb +lower-case-entry+ (case-mapping-entry code)))))
|
|
106 | 106 | |
107 | -(declaim (inline case-table-upper-case))
|
|
108 | -(defun case-table-upper-case (code)
|
|
107 | +(declaim (inline case-mapping-upper-case))
|
|
108 | +(defun case-mapping-upper-case (code)
|
|
109 | 109 | "Compute the upper-case character code for the given character CODE.
|
110 | 110 | If no upper-case code exists, just return CODE."
|
111 | 111 | (declare (type (integer 0 (#.char-code-limit)) code)
|
112 | 112 | (optimize (speed 3)))
|
113 | - (ldb (byte 16 0) (- code (ldb +upper-case-entry+ (case-table-entry code)))))
|
|
113 | + (ldb (byte 16 0) (- code (ldb +upper-case-entry+ (case-mapping-entry code)))))
|
|
114 | 114 | |
115 | 115 | (macrolet ((frob (char-names-list)
|
116 | 116 | (collect ((results))
|
... | ... | @@ -285,7 +285,7 @@ |
285 | 285 | (or (< 64 m 91)
|
286 | 286 | #+(and unicode (not unicode-bootstrap))
|
287 | 287 | (and (> m +unicode-lower-limit+)
|
288 | - (not (zerop (ldb +lower-case-entry+ (case-table-entry m))))))))
|
|
288 | + (not (zerop (ldb +lower-case-entry+ (case-mapping-entry m))))))))
|
|
289 | 289 | |
290 | 290 | |
291 | 291 | (defun lower-case-p (char)
|
... | ... | @@ -296,7 +296,7 @@ |
296 | 296 | (or (< 96 m 123)
|
297 | 297 | #+(and unicode (not unicode-bootstrap))
|
298 | 298 | (and (> m +unicode-lower-limit+)
|
299 | - (not (zerop (ldb +upper-case-entry+ (case-table-entry m))))))))
|
|
299 | + (not (zerop (ldb +upper-case-entry+ (case-mapping-entry m))))))))
|
|
300 | 300 | |
301 | 301 | (defun title-case-p (char)
|
302 | 302 | "The argument must be a character object; title-case-p returns T if the
|
... | ... | @@ -318,7 +318,7 @@ |
318 | 318 | (or (< 64 m 91) (< 96 m 123)
|
319 | 319 | #+(and unicode (not unicode-bootstrap))
|
320 | 320 | (and (> m +unicode-lower-limit+)
|
321 | - (not (zerop (case-table-entry m)))))))
|
|
321 | + (not (zerop (case-mapping-entry m)))))))
|
|
322 | 322 | |
323 | 323 | |
324 | 324 | (defun digit-char-p (char &optional (radix 10.))
|
... | ... | @@ -3335,7 +3335,7 @@ |
3335 | 3335 | (= (lisp::equal-char-code a)
|
3336 | 3336 | (lisp::equal-char-code b)))))
|
3337 | 3337 | |
3338 | -(deftransform char-upcase ((x) (character))
|
|
3338 | +(deftransform char-upcase ((x) (base-char))
|
|
3339 | 3339 | "open code"
|
3340 | 3340 | #-(and unicode (not unicode-bootstrap))
|
3341 | 3341 | '(if (lower-case-p x)
|
... | ... | @@ -3345,10 +3345,10 @@ |
3345 | 3345 | '(let ((m (char-code x)))
|
3346 | 3346 | (cond ((< 96 m 123) (code-char (- m 32)))
|
3347 | 3347 | ((> m lisp::+unicode-lower-limit+)
|
3348 | - (code-char (lisp::case-table-upper-case m)))
|
|
3348 | + (code-char (lisp::case-mapping-upper-case m)))
|
|
3349 | 3349 | (t x))))
|
3350 | 3350 | |
3351 | -(deftransform char-downcase ((x) (character))
|
|
3351 | +(deftransform char-downcase ((x) (base-char))
|
|
3352 | 3352 | "open code"
|
3353 | 3353 | #-(and unicode (not unicode-bootstrap))
|
3354 | 3354 | '(if (upper-case-p x)
|
... | ... | @@ -3358,7 +3358,7 @@ |
3358 | 3358 | '(let ((m (char-code x)))
|
3359 | 3359 | (cond ((< 64 m 91) (code-char (+ m 32)))
|
3360 | 3360 | ((> m lisp::+unicode-lower-limit+)
|
3361 | - (code-char (lisp::case-table-lower-case m)))
|
|
3361 | + (code-char (lisp::case-mapping-lower-case m)))
|
|
3362 | 3362 | (t x))))
|
3363 | 3363 | |
3364 | 3364 |
... | ... | @@ -33,7 +33,7 @@ FDLIBM = k_sin.c k_cos.c k_tan.c s_sin.c s_cos.c s_tan.c sincos.c \ |
33 | 33 | SRCS = lisp.c coreparse.c alloc.c monitor.c print.c interr.c \
|
34 | 34 | vars.c parse.c interrupt.c search.c validate.c globals.c \
|
35 | 35 | dynbind.c breakpoint.c regnames.c backtrace.c save.c purify.c \
|
36 | - runprog.c time.c case-table.c exec-init.c \
|
|
36 | + runprog.c time.c case-mapping.c exec-init.c \
|
|
37 | 37 | ${FDLIBM} ${ARCH_SRC} ${ASSEM_SRC} ${OS_SRC} ${GC_SRC}
|
38 | 38 | |
39 | 39 | OBJS = $(patsubst %.c,%.o,$(patsubst %.S,%.o,$(patsubst %.s,%.o,$(SRCS))))
|
1 | 1 | /*
|
2 | 2 | * DO NOT EDIT.
|
3 | 3 | *
|
4 | - * This was generated by (BUILD-CASE-TABLE 6) in
|
|
4 | + * This was generated by (BUILD-CASE-TABLE :stage2-size 6) in
|
|
5 | 5 | * src/tools/create-case-table.c.
|
6 | 6 | */
|
7 | 7 | |
... | ... | @@ -681,7 +681,7 @@ const uint32_t stage2_1021[64] = { |
681 | 681 | };
|
682 | 682 | |
683 | 683 | |
684 | -const uint32_t (*case_table[1024])[64] = {
|
|
684 | +const uint32_t (*case_mapping[1024])[64] = {
|
|
685 | 685 | &stage2_zeroes,
|
686 | 686 | &stage2_1,
|
687 | 687 | &stage2_zeroes,
|
1 | 1 | ;; Creates a table of tables that maps a lower case letter to an upper
|
2 | 2 | ;; case letter or an upper case letter to a lower case letter. This
|
3 | 3 | ;; mapping only works if the roundtrip casing returns the original
|
4 | -;; character, as required by CLHS.
|
|
4 | +;; character, as required by the standard.
|
|
5 | 5 | ;;
|
6 | 6 | ;; STAGE2-SIZE is the number of bits to used for the index of the
|
7 | 7 | ;; second stage table.
|
... | ... | @@ -126,10 +126,10 @@ |
126 | 126 | /*
|
127 | 127 | * DO NOT EDIT.
|
128 | 128 | *
|
129 | - * This was generated by (BUILD-CASE-TABLE ~D) in
|
|
129 | + * This was generated by (BUILD-CASE-TABLE :STAGE2-SIZE ~D) in
|
|
130 | 130 | * src/tools/create-case-table.c.
|
131 | 131 | */~2%"
|
132 | -stage2-size)
|
|
132 | + stage2-size)
|
|
133 | 133 | (format stream "#include <stdint.h>~%")
|
134 | 134 | (format stream "#include <stddef.h>~%")
|
135 | 135 | ;; First, dump the all-zeroes table
|
... | ... | @@ -144,7 +144,7 @@ stage2-size) |
144 | 144 | s2
|
145 | 145 | stream))
|
146 | 146 | ;; Now dump the stage1 table
|
147 | - (format stream "~2%const uint32_t (*case_table[~D])[~D] = {~%"
|
|
147 | + (format stream "~2%const uint32_t (*case_mapping[~D])[~D] = {~%"
|
|
148 | 148 | (length table)
|
149 | 149 | (length (aref table (position-if-not #'null table))))
|
150 | 150 | (loop for s2 across table
|
... | ... | @@ -156,6 +156,6 @@ stage2-size) |
156 | 156 | (format stream "};~%")
|
157 | 157 | (format t "Wrote ~S~%" (namestring stream))))
|
158 | 158 | |
159 | -(defun build-case-table (&key (stage2-size 6) (pathname "./src/lisp/case-table.c"))
|
|
159 | +(defun build-case-table (&key (stage2-size 6) (pathname "./src/lisp/case-mapping.c"))
|
|
160 | 160 | (let ((table (compute-case-table stage2-size)))
|
161 | 161 | (dump-case-table pathname table stage2-size))) |