Raymond Toy pushed to branch issue-322-optimize-case-mapping-size at cmucl / cmucl
Commits: a3a3fbd9 by Raymond Toy at 2024-05-30T14:38:30+00:00 Fix #312: Use correct type for message_write_float
- - - - - 7e7f3f0f by Raymond Toy at 2024-05-30T14:38:41+00:00 Merge branch 'issue-312-motif-server' into 'master'
Fix #312: Use correct type for message_write_float
Closes #312
See merge request cmucl/cmucl!218 - - - - - c6c3c9ce by Raymond Toy at 2024-05-30T22:54:14+00:00 Fix #316: Support roundtrip character casing
- - - - - 98afa3c8 by Raymond Toy at 2024-05-30T22:54:20+00:00 Merge branch 'issue-316-support-roundtrip-char-casing' into 'master'
Fix #316: Support roundtrip character casing
Closes #316
See merge request cmucl/cmucl!220 - - - - - caf68d75 by Raymond Toy at 2024-05-30T16:01:11-07:00 Merge branch 'master' into issue-322-optimize-case-mapping-size
- - - - - 4bd8ca89 by Raymond Toy at 2024-05-30T18:27:42-07:00 Fix up merge mistakes
- - - - -
7 changed files:
- src/code/char.lisp - src/i18n/locale/cmucl.pot - src/lisp/case-mapping.c - src/motif/server/datatrans.c - src/motif/server/datatrans.h - src/motif/server/tables.h - + src/tools/create-case-mapping.lisp
Changes:
===================================== src/code/char.lisp ===================================== @@ -73,8 +73,8 @@ (alien:def-alien-variable "stage2" (alien:array c-call:unsigned-int nil))
-;; Each entry in the case table consists of the code for either an -;; upper case or lower case character code. +;; Each entry in the case mapping table consists of the code for +;; either an upper case or lower case character code. (defconstant +upper-case-entry+ (byte 16 0)) (defconstant +lower-case-entry+ (byte 16 16))
@@ -84,9 +84,9 @@
(declaim (inline case-mapping-entry)) (defun case-mapping-entry (code) - "For the character code, CODE, return 0 or the 32-bit value from the - case table. A value of 0 means there was no case mapping (neither - upper nor lower case)." + "For the character code, CODE, the 32-bit value from the + case mapping table that indicates the delta between CODE and the + corresponding upper or lower case character for CODE." (declare (type (integer 0 (#.char-code-limit)) code) (optimize (speed 3) (safety 0))) (let* ((index1 (ldb (byte (- 16 +stage2-size+) +stage2-size+) @@ -94,7 +94,7 @@ (index2 (ldb (byte +stage2-size+ 0) code)) (stage2-offset (alien:deref case-mapping index1))) - (alien:deref stage2 (+ (* stage2-offset index2))))) + (alien:deref stage2 (+ stage2-offset index2))))
(declaim (inline case-mapping-lower-case)) (defun case-mapping-lower-case (code)
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -5457,9 +5457,9 @@ msgstr ""
#: src/code/char.lisp msgid "" -"For the character code, CODE, return 0 or the 32-bit value from the\n" -" case table. A value of 0 means there was no case mapping (neither\n" -" upper nor lower case)." +"For the character code, CODE, the 32-bit value from the\n" +" case mapping table that indicates the delta between CODE and the\n" +" corresponding upper or lower case character for CODE." msgstr ""
#: src/code/char.lisp
===================================== src/lisp/case-mapping.c ===================================== @@ -1,8 +1,8 @@ /* * DO NOT EDIT. * - * This was generated by (BUILD-CASE-TABLE :STAGE2-SIZE 6) in - * src/tools/create-case-table.c. + * This was generated by (BUILD-CASE-MAPPING-TABLE :STAGE2-SIZE 6) in + * src/tools/create-case-mapping.lisp. */
#include <stdint.h>
===================================== src/motif/server/datatrans.c ===================================== @@ -265,10 +265,10 @@ void message_write_color(message_t m,XColor *color,int tag) message_put_word(m,color->blue); }
-void message_write_float(message_t m,float f,int tag) +void message_write_float(message_t m,void *f,int tag) { message_put_dblword(m,combine_type_and_data(tag,0)); - message_put_dblword(m,f); + message_put_dblword(m,*(long *) f); }
@@ -524,8 +524,8 @@ void message_read_color(message_t m,XColor *color,int tag, int red)
void message_read_float(message_t m,float *f,int tag,int data) { - fprintf(stderr,">>>>> Warning:message_read_float: Not implemented.\n"); - fflush(stderr); + long d = message_get_dblword(m); + memcpy(f, &d, sizeof(*f)); }
===================================== src/motif/server/datatrans.h ===================================== @@ -38,7 +38,7 @@ extern void message_write_int_list(); extern void message_write_event(); extern void message_write_color(); /* GCC complains without the full prototype */ -extern void message_write_float(message_t,float,int); +extern void message_write_float(message_t,void *,int);
===================================== src/motif/server/tables.h ===================================== @@ -10,8 +10,8 @@ #ifndef TABLES_H #define TABLES_H
-typedef void (*type_writer)(message_t out,caddr_t src,int type_tag); -typedef void (*type_reader)(message_t in,caddr_t dest,int type_tag,int data); +typedef void (*type_writer)(message_t out,void *src,int type_tag); +typedef void (*type_reader)(message_t in,void *dest,int type_tag,int data);
typedef struct { String type;
===================================== src/tools/create-case-mapping.lisp ===================================== @@ -0,0 +1,174 @@ +;; Creates a table of tables that maps a lower case letter to an upper +;; case letter or an upper case letter to a lower case letter. This +;; mapping only works if the roundtrip casing returns the original +;; character, as required by the standard. +;; +;; STAGE2-SIZE is the number of bits to used for the index of the +;; second stage table. +;; +;; Let C be a 16-bit character code. C is decomposed into two parts. +;; The high bits are used as the index into the first table, and the +;; low bits are used as the index into the second table. The number +;; of low bits is STAGE2-SIZE. +;; +;; If the second stage table is all zeroes, the table is replaced by +;; NIL since it contains no valid mapping of lower or upper case +;; letters. +;; +;; Each element of this table is 32-bits long. The low 16 bits +;; contains the mapping of C to the corresponding upper case letter. +;; The high 16 bits maps C to the corresponding lower case letter. +(defun compute-case-mapping-table (stage2-size) + (let ((table (make-array (ash 1 (- 16 stage2-size))))) + (dotimes (i (length table)) + (setf (aref table i) (make-array (ash 1 stage2-size) + :initial-element 0 + :element-type '(unsigned-byte 32)))) + (dotimes (i char-code-limit) + (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i)) + (stage2 (ldb (byte stage2-size 0) i))) + (let ((upper (lisp::unicode-upper i)) + (lower (lisp::unicode-lower i)) + (entry 0)) + (declare (type (unsigned-byte 32) entry)) + + (assert (< upper char-code-limit)) + (assert (< lower char-code-limit)) + + ;; Compute mapping from lower case to upper case which is + ;; stored in the low 16 bits of the stage2 table. + ;; + ;; Only consider characters that have an upper case letter and + ;; whose lowercase version returns the original letter. + (when (and (/= i upper) + (= i (lisp::unicode-lower upper))) + (setf entry (ldb (byte 16 0) (- i upper)))) + ;; Compute mapping from upper case to lower case which is + ;; stored in the high 16 bits ofthe stage2 table. + ;; + ;; Only consider characters that have a lower case letter and + ;; whose upper case version returns the original letter. + (when (and (/= i lower) + (= i (lisp::unicode-upper lower))) + (setf entry (ash (ldb (byte 16 0) (- i lower)) + 16))) + + ;; Note: the entry can only contain a lower case code or an + ;; upper case code, not both because we a character is + ;; either lower case or upper case and not both at the same + ;; time. + (setf (aref (aref table stage1) stage2) + entry)))) + + ;; Find each stage2 table that is all zeroes and replace it with + ;; NIL. + (dotimes (k (length table)) + (let ((empty (count-if-not #'zerop (aref table k)))) + (when (zerop empty) + (setf (aref table k) nil)))) + table)) + +;; Given a case-mapping table TABLE, print some information about the +;; size of the tables. This includes the number of empty and +;; non-empty stage2 tables. Also print out how many total non-NIL +;; entries are needed. This is proportional to the total amount of +;; memory needed to store all the tables. +(defun print-table-stats (table stage2-size) + (let ((stage1-size (length table)) + (stage2 (loop for v across table + when v + sum (length v))) + (empty (count-if #'null table))) + (format t "stage2-size ~D~%" stage2-size) + (format t " stage1 entries: ~D: " stage1-size) + (format t " ~D non-empty ~D empty~%" (- stage1-size empty) empty) + (format t " stage2 entries: ~D (length ~D)~%" + stage2 (ash 1 stage2-size)) + (format t " total : ~D~%" (+ (length table) stage2)) + (+ (length table) stage2))) + +(defun find-optimum-size () + (let ((results + (first + (sort (loop for stage2-size from 1 to 15 + collect (list stage2-size + (print-table-stats + (compute-case-mapping-table stage2-size) + stage2-size))) + #'< + :key #'second)))) + (format t "Optimum table size: stage2-size ~D, space ~D~%" + (first results) + (second results)))) + +;; Print the case table TABLE to a file named by PATHNAME. +(defun dump-case-mapping-table (pathname table stage2-size) + ;; The first entry in the table MUST be NIL because we use that as + ;; the all-zeroes array because of the sparse entries in the table. + (assert (null (aref table 0))) + + (with-open-file (stream pathname :direction :output :if-exists :supersede) + (format stream + "~ +/* + * DO NOT EDIT. + * + * This was generated by (BUILD-CASE-MAPPING-TABLE :STAGE2-SIZE ~D) in + * src/tools/create-case-mapping.lisp. + */~2%" + stage2-size) + (format stream "#include <stdint.h>~%") + (format stream "#include <stddef.h>~%") + (format stream "~2%const uint32_t stage2[] = {~%") + (flet ((print-table (header table stream) + ;; Neatly print the table TABLE to STREAM. Each table is + ;; preceded by a C comment in HEADER. The entries are + ;; printed in hex, and neatly wrapped. + (format stream "/* ~A */" header) + (pprint-newline :mandatory stream) + (dotimes (n (length table)) + (unless (zerop n) + (write-char #, stream) + (write-char #\space stream) + (pprint-newline :fill stream)) + ;;(pprint-pop) + (format stream "0x~8,'0x" (aref table n))) + (princ #, stream) + (pprint-newline :mandatory stream))) + (let ((index 0) + offsets) + (pprint-logical-block (stream nil :prefix " ") + (print-table "zeroes" + (make-array (ash 1 stage2-size) + :initial-element 0) + stream) + (loop for k from 0 + for s2 across table + when s2 + do (progn + (incf index (ash 1 stage2-size)) + (push index offsets) + (print-table (format nil "stage2_~D (offset ~D)" k index) + s2 + stream)))) + (format stream "};~%") + + ;; Now dump the stage1 table + (format stream "~2%const uint16_t case_mapping[~D] = {~%" + (length table)) + (setf offsets (nreverse offsets)) + (loop for s2 across table + for k from 0 + if s2 + do (format stream " 0x~4,'0x, /* stage2_~D */~%" + (pop offsets) + k) + else + do (format stream " 0x~4,'0x,~%" + 0)) + (format stream "};~%") + (format t "Wrote ~S~%" (namestring stream)))))) + +(defun build-case-mapping-table (&key (stage2-size 6) (pathname "./src/lisp/case-mapping.c")) + (let ((table (compute-case-mapping-table stage2-size))) + (dump-case-mapping-table pathname table stage2-size)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/0ff3a9659d487a93fffb7d0...