Raymond Toy pushed to branch issue-322-optimize-case-mapping-size at cmucl / cmucl
Commits:
-
a3a3fbd9
by Raymond Toy at 2024-05-30T14:38:30+00:00
-
7e7f3f0f
by Raymond Toy at 2024-05-30T14:38:41+00:00
-
c6c3c9ce
by Raymond Toy at 2024-05-30T22:54:14+00:00
-
98afa3c8
by Raymond Toy at 2024-05-30T22:54:20+00:00
-
caf68d75
by Raymond Toy at 2024-05-30T16:01:11-07:00
-
4bd8ca89
by Raymond Toy at 2024-05-30T18:27:42-07:00
7 changed files:
- src/code/char.lisp
- src/i18n/locale/cmucl.pot
- src/lisp/case-mapping.c
- src/motif/server/datatrans.c
- src/motif/server/datatrans.h
- src/motif/server/tables.h
- + src/tools/create-case-mapping.lisp
Changes:
... | ... | @@ -73,8 +73,8 @@ |
73 | 73 | (alien:def-alien-variable "stage2"
|
74 | 74 | (alien:array c-call:unsigned-int nil))
|
75 | 75 | |
76 | -;; Each entry in the case table consists of the code for either an
|
|
77 | -;; upper case or lower case character code.
|
|
76 | +;; Each entry in the case mapping table consists of the code for
|
|
77 | +;; either an upper case or lower case character code.
|
|
78 | 78 | (defconstant +upper-case-entry+ (byte 16 0))
|
79 | 79 | (defconstant +lower-case-entry+ (byte 16 16))
|
80 | 80 | |
... | ... | @@ -84,9 +84,9 @@ |
84 | 84 | |
85 | 85 | (declaim (inline case-mapping-entry))
|
86 | 86 | (defun case-mapping-entry (code)
|
87 | - "For the character code, CODE, return 0 or the 32-bit value from the
|
|
88 | - case table. A value of 0 means there was no case mapping (neither
|
|
89 | - upper nor lower case)."
|
|
87 | + "For the character code, CODE, the 32-bit value from the
|
|
88 | + case mapping table that indicates the delta between CODE and the
|
|
89 | + corresponding upper or lower case character for CODE."
|
|
90 | 90 | (declare (type (integer 0 (#.char-code-limit)) code)
|
91 | 91 | (optimize (speed 3) (safety 0)))
|
92 | 92 | (let* ((index1 (ldb (byte (- 16 +stage2-size+) +stage2-size+)
|
... | ... | @@ -94,7 +94,7 @@ |
94 | 94 | (index2 (ldb (byte +stage2-size+ 0)
|
95 | 95 | code))
|
96 | 96 | (stage2-offset (alien:deref case-mapping index1)))
|
97 | - (alien:deref stage2 (+ (* stage2-offset index2)))))
|
|
97 | + (alien:deref stage2 (+ stage2-offset index2))))
|
|
98 | 98 | |
99 | 99 | (declaim (inline case-mapping-lower-case))
|
100 | 100 | (defun case-mapping-lower-case (code)
|
... | ... | @@ -5457,9 +5457,9 @@ msgstr "" |
5457 | 5457 | |
5458 | 5458 | #: src/code/char.lisp
|
5459 | 5459 | msgid ""
|
5460 | -"For the character code, CODE, return 0 or the 32-bit value from the\n"
|
|
5461 | -" case table. A value of 0 means there was no case mapping (neither\n"
|
|
5462 | -" upper nor lower case)."
|
|
5460 | +"For the character code, CODE, the 32-bit value from the\n"
|
|
5461 | +" case mapping table that indicates the delta between CODE and the\n"
|
|
5462 | +" corresponding upper or lower case character for CODE."
|
|
5463 | 5463 | msgstr ""
|
5464 | 5464 | |
5465 | 5465 | #: src/code/char.lisp
|
1 | 1 | /*
|
2 | 2 | * DO NOT EDIT.
|
3 | 3 | *
|
4 | - * This was generated by (BUILD-CASE-TABLE :STAGE2-SIZE 6) in
|
|
5 | - * src/tools/create-case-table.c.
|
|
4 | + * This was generated by (BUILD-CASE-MAPPING-TABLE :STAGE2-SIZE 6) in
|
|
5 | + * src/tools/create-case-mapping.lisp.
|
|
6 | 6 | */
|
7 | 7 | |
8 | 8 | #include <stdint.h>
|
... | ... | @@ -265,10 +265,10 @@ void message_write_color(message_t m,XColor *color,int tag) |
265 | 265 | message_put_word(m,color->blue);
|
266 | 266 | }
|
267 | 267 | |
268 | -void message_write_float(message_t m,float f,int tag)
|
|
268 | +void message_write_float(message_t m,void *f,int tag)
|
|
269 | 269 | {
|
270 | 270 | message_put_dblword(m,combine_type_and_data(tag,0));
|
271 | - message_put_dblword(m,f);
|
|
271 | + message_put_dblword(m,*(long *) f);
|
|
272 | 272 | }
|
273 | 273 | |
274 | 274 |
|
... | ... | @@ -524,8 +524,8 @@ void message_read_color(message_t m,XColor *color,int tag, int red) |
524 | 524 | |
525 | 525 | void message_read_float(message_t m,float *f,int tag,int data)
|
526 | 526 | {
|
527 | - fprintf(stderr,">>>>> Warning:message_read_float: Not implemented.\n");
|
|
528 | - fflush(stderr);
|
|
527 | + long d = message_get_dblword(m);
|
|
528 | + memcpy(f, &d, sizeof(*f));
|
|
529 | 529 | }
|
530 | 530 | |
531 | 531 |
|
... | ... | @@ -38,7 +38,7 @@ extern void message_write_int_list(); |
38 | 38 | extern void message_write_event();
|
39 | 39 | extern void message_write_color();
|
40 | 40 | /* GCC complains without the full prototype */
|
41 | -extern void message_write_float(message_t,float,int);
|
|
41 | +extern void message_write_float(message_t,void *,int);
|
|
42 | 42 | |
43 | 43 |
|
44 | 44 |
... | ... | @@ -10,8 +10,8 @@ |
10 | 10 | #ifndef TABLES_H
|
11 | 11 | #define TABLES_H
|
12 | 12 | |
13 | -typedef void (*type_writer)(message_t out,caddr_t src,int type_tag);
|
|
14 | -typedef void (*type_reader)(message_t in,caddr_t dest,int type_tag,int data);
|
|
13 | +typedef void (*type_writer)(message_t out,void *src,int type_tag);
|
|
14 | +typedef void (*type_reader)(message_t in,void *dest,int type_tag,int data);
|
|
15 | 15 | |
16 | 16 | typedef struct {
|
17 | 17 | String type;
|
1 | +;; Creates a table of tables that maps a lower case letter to an upper
|
|
2 | +;; case letter or an upper case letter to a lower case letter. This
|
|
3 | +;; mapping only works if the roundtrip casing returns the original
|
|
4 | +;; character, as required by the standard.
|
|
5 | +;;
|
|
6 | +;; STAGE2-SIZE is the number of bits to used for the index of the
|
|
7 | +;; second stage table.
|
|
8 | +;;
|
|
9 | +;; Let C be a 16-bit character code. C is decomposed into two parts.
|
|
10 | +;; The high bits are used as the index into the first table, and the
|
|
11 | +;; low bits are used as the index into the second table. The number
|
|
12 | +;; of low bits is STAGE2-SIZE.
|
|
13 | +;;
|
|
14 | +;; If the second stage table is all zeroes, the table is replaced by
|
|
15 | +;; NIL since it contains no valid mapping of lower or upper case
|
|
16 | +;; letters.
|
|
17 | +;;
|
|
18 | +;; Each element of this table is 32-bits long. The low 16 bits
|
|
19 | +;; contains the mapping of C to the corresponding upper case letter.
|
|
20 | +;; The high 16 bits maps C to the corresponding lower case letter.
|
|
21 | +(defun compute-case-mapping-table (stage2-size)
|
|
22 | + (let ((table (make-array (ash 1 (- 16 stage2-size)))))
|
|
23 | + (dotimes (i (length table))
|
|
24 | + (setf (aref table i) (make-array (ash 1 stage2-size)
|
|
25 | + :initial-element 0
|
|
26 | + :element-type '(unsigned-byte 32))))
|
|
27 | + (dotimes (i char-code-limit)
|
|
28 | + (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i))
|
|
29 | + (stage2 (ldb (byte stage2-size 0) i)))
|
|
30 | + (let ((upper (lisp::unicode-upper i))
|
|
31 | + (lower (lisp::unicode-lower i))
|
|
32 | + (entry 0))
|
|
33 | + (declare (type (unsigned-byte 32) entry))
|
|
34 | + |
|
35 | + (assert (< upper char-code-limit))
|
|
36 | + (assert (< lower char-code-limit))
|
|
37 | +
|
|
38 | + ;; Compute mapping from lower case to upper case which is
|
|
39 | + ;; stored in the low 16 bits of the stage2 table.
|
|
40 | + ;;
|
|
41 | + ;; Only consider characters that have an upper case letter and
|
|
42 | + ;; whose lowercase version returns the original letter.
|
|
43 | + (when (and (/= i upper)
|
|
44 | + (= i (lisp::unicode-lower upper)))
|
|
45 | + (setf entry (ldb (byte 16 0) (- i upper))))
|
|
46 | + ;; Compute mapping from upper case to lower case which is
|
|
47 | + ;; stored in the high 16 bits ofthe stage2 table.
|
|
48 | + ;;
|
|
49 | + ;; Only consider characters that have a lower case letter and
|
|
50 | + ;; whose upper case version returns the original letter.
|
|
51 | + (when (and (/= i lower)
|
|
52 | + (= i (lisp::unicode-upper lower)))
|
|
53 | + (setf entry (ash (ldb (byte 16 0) (- i lower))
|
|
54 | + 16)))
|
|
55 | + |
|
56 | + ;; Note: the entry can only contain a lower case code or an
|
|
57 | + ;; upper case code, not both because we a character is
|
|
58 | + ;; either lower case or upper case and not both at the same
|
|
59 | + ;; time.
|
|
60 | + (setf (aref (aref table stage1) stage2)
|
|
61 | + entry))))
|
|
62 | + |
|
63 | + ;; Find each stage2 table that is all zeroes and replace it with
|
|
64 | + ;; NIL.
|
|
65 | + (dotimes (k (length table))
|
|
66 | + (let ((empty (count-if-not #'zerop (aref table k))))
|
|
67 | + (when (zerop empty)
|
|
68 | + (setf (aref table k) nil))))
|
|
69 | + table))
|
|
70 | + |
|
71 | +;; Given a case-mapping table TABLE, print some information about the
|
|
72 | +;; size of the tables. This includes the number of empty and
|
|
73 | +;; non-empty stage2 tables. Also print out how many total non-NIL
|
|
74 | +;; entries are needed. This is proportional to the total amount of
|
|
75 | +;; memory needed to store all the tables.
|
|
76 | +(defun print-table-stats (table stage2-size)
|
|
77 | + (let ((stage1-size (length table))
|
|
78 | + (stage2 (loop for v across table
|
|
79 | + when v
|
|
80 | + sum (length v)))
|
|
81 | + (empty (count-if #'null table)))
|
|
82 | + (format t "stage2-size ~D~%" stage2-size)
|
|
83 | + (format t " stage1 entries: ~D: " stage1-size)
|
|
84 | + (format t " ~D non-empty ~D empty~%" (- stage1-size empty) empty)
|
|
85 | + (format t " stage2 entries: ~D (length ~D)~%"
|
|
86 | + stage2 (ash 1 stage2-size))
|
|
87 | + (format t " total : ~D~%" (+ (length table) stage2))
|
|
88 | + (+ (length table) stage2)))
|
|
89 | + |
|
90 | +(defun find-optimum-size ()
|
|
91 | + (let ((results
|
|
92 | + (first
|
|
93 | + (sort (loop for stage2-size from 1 to 15
|
|
94 | + collect (list stage2-size
|
|
95 | + (print-table-stats
|
|
96 | + (compute-case-mapping-table stage2-size)
|
|
97 | + stage2-size)))
|
|
98 | + #'<
|
|
99 | + :key #'second))))
|
|
100 | + (format t "Optimum table size: stage2-size ~D, space ~D~%"
|
|
101 | + (first results)
|
|
102 | + (second results))))
|
|
103 | + |
|
104 | +;; Print the case table TABLE to a file named by PATHNAME.
|
|
105 | +(defun dump-case-mapping-table (pathname table stage2-size)
|
|
106 | + ;; The first entry in the table MUST be NIL because we use that as
|
|
107 | + ;; the all-zeroes array because of the sparse entries in the table.
|
|
108 | + (assert (null (aref table 0)))
|
|
109 | + |
|
110 | + (with-open-file (stream pathname :direction :output :if-exists :supersede)
|
|
111 | + (format stream
|
|
112 | + "~
|
|
113 | +/*
|
|
114 | + * DO NOT EDIT.
|
|
115 | + *
|
|
116 | + * This was generated by (BUILD-CASE-MAPPING-TABLE :STAGE2-SIZE ~D) in
|
|
117 | + * src/tools/create-case-mapping.lisp.
|
|
118 | + */~2%"
|
|
119 | + stage2-size)
|
|
120 | + (format stream "#include <stdint.h>~%")
|
|
121 | + (format stream "#include <stddef.h>~%")
|
|
122 | + (format stream "~2%const uint32_t stage2[] = {~%")
|
|
123 | + (flet ((print-table (header table stream)
|
|
124 | + ;; Neatly print the table TABLE to STREAM. Each table is
|
|
125 | + ;; preceded by a C comment in HEADER. The entries are
|
|
126 | + ;; printed in hex, and neatly wrapped.
|
|
127 | + (format stream "/* ~A */" header)
|
|
128 | + (pprint-newline :mandatory stream)
|
|
129 | + (dotimes (n (length table))
|
|
130 | + (unless (zerop n)
|
|
131 | + (write-char #\, stream)
|
|
132 | + (write-char #\space stream)
|
|
133 | + (pprint-newline :fill stream))
|
|
134 | + ;;(pprint-pop)
|
|
135 | + (format stream "0x~8,'0x" (aref table n)))
|
|
136 | + (princ #\, stream)
|
|
137 | + (pprint-newline :mandatory stream)))
|
|
138 | + (let ((index 0)
|
|
139 | + offsets)
|
|
140 | + (pprint-logical-block (stream nil :prefix " ")
|
|
141 | + (print-table "zeroes"
|
|
142 | + (make-array (ash 1 stage2-size)
|
|
143 | + :initial-element 0)
|
|
144 | + stream)
|
|
145 | + (loop for k from 0
|
|
146 | + for s2 across table
|
|
147 | + when s2
|
|
148 | + do (progn
|
|
149 | + (incf index (ash 1 stage2-size))
|
|
150 | + (push index offsets)
|
|
151 | + (print-table (format nil "stage2_~D (offset ~D)" k index)
|
|
152 | + s2
|
|
153 | + stream))))
|
|
154 | + (format stream "};~%")
|
|
155 | +
|
|
156 | + ;; Now dump the stage1 table
|
|
157 | + (format stream "~2%const uint16_t case_mapping[~D] = {~%"
|
|
158 | + (length table))
|
|
159 | + (setf offsets (nreverse offsets))
|
|
160 | + (loop for s2 across table
|
|
161 | + for k from 0
|
|
162 | + if s2
|
|
163 | + do (format stream " 0x~4,'0x, /* stage2_~D */~%"
|
|
164 | + (pop offsets)
|
|
165 | + k)
|
|
166 | + else
|
|
167 | + do (format stream " 0x~4,'0x,~%"
|
|
168 | + 0))
|
|
169 | + (format stream "};~%")
|
|
170 | + (format t "Wrote ~S~%" (namestring stream))))))
|
|
171 | + |
|
172 | +(defun build-case-mapping-table (&key (stage2-size 6) (pathname "./src/lisp/case-mapping.c"))
|
|
173 | + (let ((table (compute-case-mapping-table stage2-size)))
|
|
174 | + (dump-case-mapping-table pathname table stage2-size))) |