|
|
1
|
+;; Creates a table of tables that maps a lower case letter to an upper
|
|
|
2
|
+;; case letter or an upper case letter to a lower case letter. This
|
|
|
3
|
+;; mapping only works if the roundtrip casing returns the original
|
|
|
4
|
+;; character, as required by the standard.
|
|
|
5
|
+;;
|
|
|
6
|
+;; STAGE2-SIZE is the number of bits to used for the index of the
|
|
|
7
|
+;; second stage table.
|
|
|
8
|
+;;
|
|
|
9
|
+;; Let C be a 16-bit character code. C is decomposed into two parts.
|
|
|
10
|
+;; The high bits are used as the index into the first table, and the
|
|
|
11
|
+;; low bits are used as the index into the second table. The number
|
|
|
12
|
+;; of low bits is STAGE2-SIZE.
|
|
|
13
|
+;;
|
|
|
14
|
+;; If the second stage table is all zeroes, the table is replaced by
|
|
|
15
|
+;; NIL since it contains no valid mapping of lower or upper case
|
|
|
16
|
+;; letters.
|
|
|
17
|
+;;
|
|
|
18
|
+;; Each element of this table is 32-bits long. The low 16 bits
|
|
|
19
|
+;; contains the mapping of C to the corresponding upper case letter.
|
|
|
20
|
+;; The high 16 bits maps C to the corresponding lower case letter.
|
|
|
21
|
+(defun compute-case-mapping-table (stage2-size)
|
|
|
22
|
+ (let ((table (make-array (ash 1 (- 16 stage2-size)))))
|
|
|
23
|
+ (dotimes (i (length table))
|
|
|
24
|
+ (setf (aref table i) (make-array (ash 1 stage2-size)
|
|
|
25
|
+ :initial-element 0
|
|
|
26
|
+ :element-type '(unsigned-byte 32))))
|
|
|
27
|
+ (dotimes (i char-code-limit)
|
|
|
28
|
+ (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i))
|
|
|
29
|
+ (stage2 (ldb (byte stage2-size 0) i)))
|
|
|
30
|
+ (let ((upper (lisp::unicode-upper i))
|
|
|
31
|
+ (lower (lisp::unicode-lower i))
|
|
|
32
|
+ (entry 0))
|
|
|
33
|
+ (declare (type (unsigned-byte 32) entry))
|
|
|
34
|
+
|
|
|
35
|
+ (assert (< upper char-code-limit))
|
|
|
36
|
+ (assert (< lower char-code-limit))
|
|
|
37
|
+
|
|
|
38
|
+ ;; Compute mapping from lower case to upper case which is
|
|
|
39
|
+ ;; stored in the low 16 bits of the stage2 table.
|
|
|
40
|
+ ;;
|
|
|
41
|
+ ;; Only consider characters that have an upper case letter and
|
|
|
42
|
+ ;; whose lowercase version returns the original letter.
|
|
|
43
|
+ (when (and (/= i upper)
|
|
|
44
|
+ (= i (lisp::unicode-lower upper)))
|
|
|
45
|
+ (setf entry (ldb (byte 16 0) (- i upper))))
|
|
|
46
|
+ ;; Compute mapping from upper case to lower case which is
|
|
|
47
|
+ ;; stored in the high 16 bits ofthe stage2 table.
|
|
|
48
|
+ ;;
|
|
|
49
|
+ ;; Only consider characters that have a lower case letter and
|
|
|
50
|
+ ;; whose upper case version returns the original letter.
|
|
|
51
|
+ (when (and (/= i lower)
|
|
|
52
|
+ (= i (lisp::unicode-upper lower)))
|
|
|
53
|
+ (setf entry (ash (ldb (byte 16 0) (- i lower))
|
|
|
54
|
+ 16)))
|
|
|
55
|
+
|
|
|
56
|
+ ;; Note: the entry can only contain a lower case code or an
|
|
|
57
|
+ ;; upper case code, not both because we a character is
|
|
|
58
|
+ ;; either lower case or upper case and not both at the same
|
|
|
59
|
+ ;; time.
|
|
|
60
|
+ (setf (aref (aref table stage1) stage2)
|
|
|
61
|
+ entry))))
|
|
|
62
|
+
|
|
|
63
|
+ ;; Find each stage2 table that is all zeroes and replace it with
|
|
|
64
|
+ ;; NIL.
|
|
|
65
|
+ (dotimes (k (length table))
|
|
|
66
|
+ (let ((empty (count-if-not #'zerop (aref table k))))
|
|
|
67
|
+ (when (zerop empty)
|
|
|
68
|
+ (setf (aref table k) nil))))
|
|
|
69
|
+ table))
|
|
|
70
|
+
|
|
|
71
|
+;; Given a case-mapping table TABLE, print some information about the
|
|
|
72
|
+;; size of the tables. This includes the number of empty and
|
|
|
73
|
+;; non-empty stage2 tables. Also print out how many total non-NIL
|
|
|
74
|
+;; entries are needed. This is proportional to the total amount of
|
|
|
75
|
+;; memory needed to store all the tables.
|
|
|
76
|
+(defun print-table-stats (table stage2-size)
|
|
|
77
|
+ (let ((stage1-size (length table))
|
|
|
78
|
+ (stage2 (loop for v across table
|
|
|
79
|
+ when v
|
|
|
80
|
+ sum (length v)))
|
|
|
81
|
+ (empty (count-if #'null table)))
|
|
|
82
|
+ (format t "stage2-size ~D~%" stage2-size)
|
|
|
83
|
+ (format t " stage1 entries: ~D: " stage1-size)
|
|
|
84
|
+ (format t " ~D non-empty ~D empty~%" (- stage1-size empty) empty)
|
|
|
85
|
+ (format t " stage2 entries: ~D (length ~D)~%"
|
|
|
86
|
+ stage2 (ash 1 stage2-size))
|
|
|
87
|
+ (format t " total : ~D~%" (+ (length table) stage2))
|
|
|
88
|
+ (+ (length table) stage2)))
|
|
|
89
|
+
|
|
|
90
|
+(defun find-optimum-size ()
|
|
|
91
|
+ (let ((results
|
|
|
92
|
+ (first
|
|
|
93
|
+ (sort (loop for stage2-size from 1 to 15
|
|
|
94
|
+ collect (list stage2-size
|
|
|
95
|
+ (print-table-stats
|
|
|
96
|
+ (compute-case-mapping-table stage2-size)
|
|
|
97
|
+ stage2-size)))
|
|
|
98
|
+ #'<
|
|
|
99
|
+ :key #'second))))
|
|
|
100
|
+ (format t "Optimum table size: stage2-size ~D, space ~D~%"
|
|
|
101
|
+ (first results)
|
|
|
102
|
+ (second results))))
|
|
|
103
|
+
|
|
|
104
|
+;; Print the case table TABLE to a file named by PATHNAME.
|
|
|
105
|
+(defun dump-case-mapping-table (pathname table stage2-size)
|
|
|
106
|
+ ;; The first entry in the table MUST be NIL because we use that as
|
|
|
107
|
+ ;; the all-zeroes array because of the sparse entries in the table.
|
|
|
108
|
+ (assert (null (aref table 0)))
|
|
|
109
|
+
|
|
|
110
|
+ (with-open-file (stream pathname :direction :output :if-exists :supersede)
|
|
|
111
|
+ (format stream
|
|
|
112
|
+ "~
|
|
|
113
|
+/*
|
|
|
114
|
+ * DO NOT EDIT.
|
|
|
115
|
+ *
|
|
|
116
|
+ * This was generated by (BUILD-CASE-MAPPING-TABLE :STAGE2-SIZE ~D) in
|
|
|
117
|
+ * src/tools/create-case-mapping.lisp.
|
|
|
118
|
+ */~2%"
|
|
|
119
|
+ stage2-size)
|
|
|
120
|
+ (format stream "#include <stdint.h>~%")
|
|
|
121
|
+ (format stream "#include <stddef.h>~%")
|
|
|
122
|
+ (format stream "~2%const uint32_t stage2[] = {~%")
|
|
|
123
|
+ (flet ((print-table (header table stream)
|
|
|
124
|
+ ;; Neatly print the table TABLE to STREAM. Each table is
|
|
|
125
|
+ ;; preceded by a C comment in HEADER. The entries are
|
|
|
126
|
+ ;; printed in hex, and neatly wrapped.
|
|
|
127
|
+ (format stream "/* ~A */" header)
|
|
|
128
|
+ (pprint-newline :mandatory stream)
|
|
|
129
|
+ (dotimes (n (length table))
|
|
|
130
|
+ (unless (zerop n)
|
|
|
131
|
+ (write-char #\, stream)
|
|
|
132
|
+ (write-char #\space stream)
|
|
|
133
|
+ (pprint-newline :fill stream))
|
|
|
134
|
+ ;;(pprint-pop)
|
|
|
135
|
+ (format stream "0x~8,'0x" (aref table n)))
|
|
|
136
|
+ (princ #\, stream)
|
|
|
137
|
+ (pprint-newline :mandatory stream)))
|
|
|
138
|
+ (let ((index 0)
|
|
|
139
|
+ offsets)
|
|
|
140
|
+ (pprint-logical-block (stream nil :prefix " ")
|
|
|
141
|
+ (print-table "zeroes"
|
|
|
142
|
+ (make-array (ash 1 stage2-size)
|
|
|
143
|
+ :initial-element 0)
|
|
|
144
|
+ stream)
|
|
|
145
|
+ (loop for k from 0
|
|
|
146
|
+ for s2 across table
|
|
|
147
|
+ when s2
|
|
|
148
|
+ do (progn
|
|
|
149
|
+ (incf index (ash 1 stage2-size))
|
|
|
150
|
+ (push index offsets)
|
|
|
151
|
+ (print-table (format nil "stage2_~D (offset ~D)" k index)
|
|
|
152
|
+ s2
|
|
|
153
|
+ stream))))
|
|
|
154
|
+ (format stream "};~%")
|
|
|
155
|
+
|
|
|
156
|
+ ;; Now dump the stage1 table
|
|
|
157
|
+ (format stream "~2%const uint16_t case_mapping[~D] = {~%"
|
|
|
158
|
+ (length table))
|
|
|
159
|
+ (setf offsets (nreverse offsets))
|
|
|
160
|
+ (loop for s2 across table
|
|
|
161
|
+ for k from 0
|
|
|
162
|
+ if s2
|
|
|
163
|
+ do (format stream " 0x~4,'0x, /* stage2_~D */~%"
|
|
|
164
|
+ (pop offsets)
|
|
|
165
|
+ k)
|
|
|
166
|
+ else
|
|
|
167
|
+ do (format stream " 0x~4,'0x,~%"
|
|
|
168
|
+ 0))
|
|
|
169
|
+ (format stream "};~%")
|
|
|
170
|
+ (format t "Wrote ~S~%" (namestring stream))))))
|
|
|
171
|
+
|
|
|
172
|
+(defun build-case-mapping-table (&key (stage2-size 6) (pathname "./src/lisp/case-mapping.c"))
|
|
|
173
|
+ (let ((table (compute-case-mapping-table stage2-size)))
|
|
|
174
|
+ (dump-case-mapping-table pathname table stage2-size))) |