Raymond Toy pushed to branch issue-316-support-roundtrip-char-casing at cmucl / cmucl
Commits: 7c1b16c8 by Raymond Toy at 2024-05-22T07:34:06-07:00 Add comments and clean up code.
Document the functions and clean up implementation, renaming some and adding more.
Add `find-optimum-size` function to try all stage2 sizes to find the one that minimizes the space needed for the tables. We ran this to find out the optimum size is to have a stage2 size of 6.
Add 'build-case-table` to build the case table and print the results to a file, defaulting to "src/lisp/case-table.c".
- - - - - ac2cda09 by Raymond Toy at 2024-05-22T07:37:09-07:00 Regenerated with the new build-case-table function.
Just basically added a comment that the file should not be edited and a comment on how it was generated.
- - - - -
2 changed files:
- src/lisp/case-table.c - src/tools/create-case-table.lisp
Changes:
===================================== src/lisp/case-table.c ===================================== @@ -1,7 +1,14 @@ +/* + * DO NOT EDIT. + * + * This was generated by (BUILD-CASE-TABLE 6) in + * src/tools/create-case-table.c. + */ + #include <stdint.h> #include <stddef.h>
-const uint32_t stage2_1 [64] = { +const uint32_t stage2_1[64] = { 0x00000000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, @@ -14,7 +21,8 @@ const uint32_t stage2_1 [64] = { 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_3 [64] = { + +const uint32_t stage2_3[64] = { 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, @@ -27,7 +35,8 @@ const uint32_t stage2_3 [64] = { 0x00000020, 0x00000000, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x0000FF87 }; -const uint32_t stage2_4 [64] = { + +const uint32_t stage2_4[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -40,7 +49,8 @@ const uint32_t stage2_4 [64] = { 0xFFFF0000, 0x00000001, 0x00000000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000 }; -const uint32_t stage2_5 [64] = { + +const uint32_t stage2_5[64] = { 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0x00000000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -53,7 +63,8 @@ const uint32_t stage2_5 [64] = { 0xFFFF0000, 0x00000001, 0x00790000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0x00000000 }; -const uint32_t stage2_6 [64] = { + +const uint32_t stage2_6[64] = { 0x0000FF3D, 0xFF2E0000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFF320000, 0xFFFF0000, 0x00000001, 0xFF330000, 0xFF330000, 0xFFFF0000, 0x00000001, 0x00000000, 0xFFB10000, 0xFF360000, 0xFF350000, 0xFFFF0000, @@ -66,7 +77,8 @@ const uint32_t stage2_6 [64] = { 0x00000001, 0xFF250000, 0xFFFF0000, 0x00000001, 0x00000000, 0x00000000, 0xFFFF0000, 0x00000001, 0x00000000, 0x0000FFC8 }; -const uint32_t stage2_7 [64] = { + +const uint32_t stage2_7[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFE0000, 0x00000000, 0x00000002, 0xFFFE0000, 0x00000000, 0x00000002, 0xFFFE0000, 0x00000000, 0x00000002, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, @@ -79,7 +91,8 @@ const uint32_t stage2_7 [64] = { 0x00610000, 0x00380000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_8 [64] = { + +const uint32_t stage2_8[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -92,7 +105,8 @@ const uint32_t stage2_8 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xD5D50000, 0xFFFF0000, 0x00000001, 0x00A30000, 0xD5D80000, 0x0000D5C1 }; -const uint32_t stage2_9 [64] = { + +const uint32_t stage2_9[64] = { 0x0000D5C1, 0xFFFF0000, 0x00000001, 0x00C30000, 0xFFBB0000, 0xFFB90000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0x0000D5E1, 0x0000D5E4, @@ -105,7 +119,8 @@ const uint32_t stage2_9 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000D619, 0x00000000, 0x00000000 }; -const uint32_t stage2_10 [64] = { + +const uint32_t stage2_10[64] = { 0x000000DA, 0x00000000, 0x00000000, 0x000000DA, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000DA, 0x00000045, 0x000000D9, 0x000000D9, 0x00000047, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -118,7 +133,8 @@ const uint32_t stage2_10 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_13 [64] = { + +const uint32_t stage2_13[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -131,7 +147,8 @@ const uint32_t stage2_13 [64] = { 0xFFFF0000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x0000FF7E, 0x0000FF7E, 0x0000FF7E, 0x00000000, 0x00000000 }; -const uint32_t stage2_14 [64] = { + +const uint32_t stage2_14[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFDA0000, 0x00000000, 0xFFDB0000, 0xFFDB0000, 0xFFDB0000, 0x00000000, 0xFFC00000, 0x00000000, 0xFFC10000, 0xFFC10000, 0x00000000, 0xFFE00000, @@ -144,7 +161,8 @@ const uint32_t stage2_14 [64] = { 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020 }; -const uint32_t stage2_15 [64] = { + +const uint32_t stage2_15[64] = { 0x00000020, 0x00000020, 0x00000000, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000040, 0x0000003F, 0x0000003F, 0xFFF80000, 0x00000000, 0x00000000, @@ -157,7 +175,8 @@ const uint32_t stage2_15 [64] = { 0x00000000, 0xFFFF0000, 0x00000001, 0x00070000, 0xFFFF0000, 0x00000001, 0x00000000, 0x00820000, 0x00820000, 0x00820000 }; -const uint32_t stage2_16 [64] = { + +const uint32_t stage2_16[64] = { 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFB00000, 0xFFE00000, 0xFFE00000, @@ -170,7 +189,8 @@ const uint32_t stage2_16 [64] = { 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020 }; -const uint32_t stage2_17 [64] = { + +const uint32_t stage2_17[64] = { 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000050, 0x00000050, @@ -183,7 +203,8 @@ const uint32_t stage2_17 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_18 [64] = { + +const uint32_t stage2_18[64] = { 0xFFFF0000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -196,7 +217,8 @@ const uint32_t stage2_18 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_19 [64] = { + +const uint32_t stage2_19[64] = { 0xFFF10000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0x0000000F, 0xFFFF0000, 0x00000001, @@ -209,7 +231,8 @@ const uint32_t stage2_19 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_20 [64] = { + +const uint32_t stage2_20[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -222,7 +245,8 @@ const uint32_t stage2_20 [64] = { 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000 }; -const uint32_t stage2_21 [64] = { + +const uint32_t stage2_21[64] = { 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, @@ -235,7 +259,8 @@ const uint32_t stage2_21 [64] = { 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030 }; -const uint32_t stage2_22 [64] = { + +const uint32_t stage2_22[64] = { 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -248,7 +273,8 @@ const uint32_t stage2_22 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_66 [64] = { + +const uint32_t stage2_66[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -261,7 +287,8 @@ const uint32_t stage2_66 [64] = { 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000 }; -const uint32_t stage2_67 [64] = { + +const uint32_t stage2_67[64] = { 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0xE3A00000, 0x00000000, 0xE3A00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xE3A00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -274,7 +301,8 @@ const uint32_t stage2_67 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_117 [64] = { + +const uint32_t stage2_117[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -287,7 +315,8 @@ const uint32_t stage2_117 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x000075FC, 0x00000000, 0x00000000, 0x00000000, 0x0000F11A, 0x00000000, 0x00000000 }; -const uint32_t stage2_120 [64] = { + +const uint32_t stage2_120[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -300,7 +329,8 @@ const uint32_t stage2_120 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_121 [64] = { + +const uint32_t stage2_121[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -313,7 +343,8 @@ const uint32_t stage2_121 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_122 [64] = { + +const uint32_t stage2_122[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -326,7 +357,8 @@ const uint32_t stage2_122 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_123 [64] = { + +const uint32_t stage2_123[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -339,7 +371,8 @@ const uint32_t stage2_123 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_124 [64] = { + +const uint32_t stage2_124[64] = { 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x0000FFF8, 0x0000FFF8, @@ -352,7 +385,8 @@ const uint32_t stage2_124 [64] = { 0x0000FFF8, 0x0000FFF8, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000 }; -const uint32_t stage2_125 [64] = { + +const uint32_t stage2_125[64] = { 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x00000000, 0x00000000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00000000, 0x00000000, 0x00000000, 0x0000FFF8, @@ -365,7 +399,8 @@ const uint32_t stage2_125 [64] = { 0x0000FF9C, 0x0000FF9C, 0x0000FF80, 0x0000FF80, 0x0000FF90, 0x0000FF90, 0x0000FF82, 0x0000FF82, 0x00000000, 0x00000000 }; -const uint32_t stage2_126 [64] = { + +const uint32_t stage2_126[64] = { 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x0000FFF8, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x00080000, 0x0000FFF8, 0x0000FFF8, @@ -378,7 +413,8 @@ const uint32_t stage2_126 [64] = { 0x00000000, 0x00000000, 0x00080000, 0x00080000, 0x004A0000, 0x004A0000, 0x00090000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_127 [64] = { + +const uint32_t stage2_127[64] = { 0x00000000, 0x00000000, 0x00000000, 0x0000FFF7, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00560000, 0x00560000, 0x00560000, 0x00560000, 0x00090000, 0x00000000, 0x00000000, 0x00000000, 0x0000FFF8, 0x0000FFF8, @@ -391,7 +427,8 @@ const uint32_t stage2_127 [64] = { 0x00000000, 0x00000000, 0x00800000, 0x00800000, 0x007E0000, 0x007E0000, 0x00090000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_132 [64] = { + +const uint32_t stage2_132[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -404,7 +441,8 @@ const uint32_t stage2_132 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_133 [64] = { + +const uint32_t stage2_133[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000001C, 0x00000000, 0x00000000, 0x00000000, @@ -417,7 +455,8 @@ const uint32_t stage2_133 [64] = { 0x00000010, 0x00000010, 0x00000010, 0x00000010, 0x00000010, 0x00000010, 0x00000010, 0x00000010, 0x00000010, 0x00000010 }; -const uint32_t stage2_134 [64] = { + +const uint32_t stage2_134[64] = { 0x00000000, 0x00000000, 0x00000000, 0xFFFF0000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -430,7 +469,8 @@ const uint32_t stage2_134 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_146 [64] = { + +const uint32_t stage2_146[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -443,7 +483,8 @@ const uint32_t stage2_146 [64] = { 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000 }; -const uint32_t stage2_147 [64] = { + +const uint32_t stage2_147[64] = { 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0xFFE60000, 0x0000001A, 0x0000001A, @@ -456,7 +497,8 @@ const uint32_t stage2_147 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_176 [64] = { + +const uint32_t stage2_176[64] = { 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, 0xFFD00000, @@ -469,7 +511,8 @@ const uint32_t stage2_176 [64] = { 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030 }; -const uint32_t stage2_177 [64] = { + +const uint32_t stage2_177[64] = { 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, 0x00000030, @@ -482,7 +525,8 @@ const uint32_t stage2_177 [64] = { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2A3F0000, 0x2A3F0000 }; -const uint32_t stage2_178 [64] = { + +const uint32_t stage2_178[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -495,7 +539,8 @@ const uint32_t stage2_178 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_179 [64] = { + +const uint32_t stage2_179[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -508,7 +553,8 @@ const uint32_t stage2_179 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_180 [64] = { + +const uint32_t stage2_180[64] = { 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, 0x00001C60, @@ -521,7 +567,8 @@ const uint32_t stage2_180 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_665 [64] = { + +const uint32_t stage2_665[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -534,7 +581,8 @@ const uint32_t stage2_665 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_666 [64] = { + +const uint32_t stage2_666[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -547,7 +595,8 @@ const uint32_t stage2_666 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_668 [64] = { + +const uint32_t stage2_668[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -560,7 +609,8 @@ const uint32_t stage2_668 [64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_669 [64] = { + +const uint32_t stage2_669[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, @@ -573,7 +623,8 @@ const uint32_t stage2_669 [64] = { 0x00000000, 0x00000000, 0x00000000, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0x8A040000, 0xFFFF0000, 0x00000001 }; -const uint32_t stage2_670 [64] = { + +const uint32_t stage2_670[64] = { 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0xFFFF0000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0000, 0x00000001, 0xA5280000, 0x00000000, 0x00000000, 0xFFFF0000, 0x00000001, @@ -586,7 +637,8 @@ const uint32_t stage2_670 [64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_1020 [64] = { + +const uint32_t stage2_1020[64] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -599,7 +651,8 @@ const uint32_t stage2_1020 [64] = { 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0xFFE00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -const uint32_t stage2_1021 [64] = { + +const uint32_t stage2_1021[64] = { 0x00000000, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020, 0x00000020,
===================================== src/tools/create-case-table.lisp ===================================== @@ -1,44 +1,97 @@ -(defun build-case-table (&optional (stage2-size 6)) +;; Creates a table of tables that maps a lower case letter to an upper +;; case letter or an upper case letter to a lower case letter. This +;; mapping only works if the roundtrip casing returns the original +;; character, as required by CLHS. +;; +;; STAGE2-SIZE is the number of bits to used for the index of the +;; second stage table. +;; +;; Let C be a 16-bit character code. C is decomposed into two parts. +;; The high bits are used as the index into the first table, and the +;; low bits are used as the index into the second table. The number +;; of low bits is STAGE2-SIZE. +;; +;; If the second stage table is all zeroes, the table is replaced by +;; NIL since it contains no valid mapping of lower or upper case +;; letters. +;; +;; Each element of this table is 32-bits long. The low 16 bits +;; contains the mapping of C to the corresponding upper case letter. +;; The high 16 bits maps C to the corresponding lower case letter. +(defun compute-case-table (stage2-size) (let ((table (make-array (ash 1 (- 16 stage2-size))))) (dotimes (i (length table)) (setf (aref table i) (make-array (ash 1 stage2-size) :initial-element 0))) - (dotimes (i #xFFFF) - ;; Compute mapping from lower case to upper case. The offset is - ;; stored in the low 16 bits of the stage2 table. - (when (and (/= i (lisp::unicode-upper i)) - (= i (lisp::unicode-lower (lisp::unicode-upper i)))) - (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i)) - (stage2 (ldb (byte stage2-size 0) i)) - (delta (ldb (byte 16 0) (- i (lisp::unicode-upper i))))) + (dotimes (i char-code-limit) + (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i)) + (stage2 (ldb (byte stage2-size 0) i))) + ;; Compute mapping from lower case to upper case. The offset + ;; is stored in the low 16 bits of the stage2 table. + ;; + ;; Only consider characters that have an upper case letter and + ;; whose lowercase version returns the original letter. + (when (and (/= i (lisp::unicode-upper i)) + (= i (lisp::unicode-lower (lisp::unicode-upper i)))) + (let ((delta (ldb (byte 16 0) (- i (lisp::unicode-upper i))))) (setf (aref (aref table stage1) stage2) delta))) - ;; Compute mapping from upper case to lower case. The offset is - ;; stored in the high 16 bits ofthe stage2 table. - (when (and (/= i (lisp::unicode-lower i)) - (= i (lisp::unicode-upper (lisp::unicode-lower i)))) - (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i)) - (stage2 (ldb (byte stage2-size 0) i)) - (delta (ldb (byte 16 0) (- i (lisp::unicode-lower i))))) - (setf (aref (aref table stage1) stage2) (ash delta 16))))) - (let ((empty (count-if #'(lambda (x) (every #'zerop x)) table))) - (format t "~D non-empty ~D empty~%" (- (length table) empty) empty) - (dotimes (k (length table)) - (let ((empty (count-if-not #'zerop (aref table k)))) - (when (zerop empty) - (setf (aref table k) nil)) - #+nil - (format t "~3D: ~D: ~A~%" k empty (aref table k)))) - (let ((stage2 (loop for v across table - when v - sum (length v)))) - (format t "stage1 entries: ~D~%" (length table)) - (format t "stage2 entries: ~D (length ~D)~%" - stage2 (ash 1 stage2-size)) - (format t "total : ~D~%" (+ (length table) stage2))) - table))) + ;; Compute mapping from upper case to lower case. The offset + ;; is stored in the high 16 bits ofthe stage2 table. + ;; + ;; Only consider characters that have a lower case letter and + ;; whose upper case version returns the original letter. + (when (and (/= i (lisp::unicode-lower i)) + (= i (lisp::unicode-upper (lisp::unicode-lower i)))) + (let ((delta (ldb (byte 16 0) (- i (lisp::unicode-lower i))))) + (setf (aref (aref table stage1) stage2) (ash delta 16)))))) + ;; Find each stage2 table that is all zeroes and replace it with + ;; NIL. + (dotimes (k (length table)) + (let ((empty (count-if-not #'zerop (aref table k)))) + (when (zerop empty) + (setf (aref table k) nil)) + #+nil + (format t "~3D: ~D: ~A~%" k empty (aref table k)))) + table))
+;; Given a case-mapping table TABLE, print some information about the +;; size of the tables. This includes the number of empty and +;; non-empty stage2 tables. Also print out how many total non-NIL +;; entries are needed. This is proportional to the total amount of +;; memory needed to store all the tables. +(defun print-table-stats (table stage2-size) + (let ((stage1-size (length table)) + (stage2 (loop for v across table + when v + sum (length v))) + (empty (count-if #'null table))) + (format t "stage2-size ~D~%" stage2-size) + (format t " stage1 entries: ~D: " stage1-size) + (format t " ~D non-empty ~D empty~%" (- stage1-size empty) empty) + (format t " stage2 entries: ~D (length ~D)~%" + stage2 (ash 1 stage2-size)) + (format t " total : ~D~%" (+ (length table) stage2)) + (+ (length table) stage2))) + +(defun find-optimum-size () + (let ((results + (first + (sort (loop for stage2-size from 1 to 15 + collect (list stage2-size + (print-table-stats + (compute-case-table stage2-size) + stage2-size))) + #'< + :key #'second)))) + (format t "Optimum table size: stage2-size ~D, space ~D~%" + (first results) + (second results)))) + +;; Neatly print the K'th stage2 table TABLE to STREAM. Each table is +;; named "stage2_k". (defun print-table (k table stream) - (format stream "const uint32_t stage2_~D [~D] = {~%" k (length table)) + (format stream "~%const uint32_t stage2_~D[~D] = {~%" k (length table)) + ;; Neatly wrap the entries. (pprint-logical-block (stream nil :prefix " ") (dotimes (n (length table)) (unless (zerop n) @@ -49,10 +102,20 @@ (format stream "0x~8,'0x" (aref table n)))) (format stream "~%};~%"))
-(defun dump-case-table (pathname table) +;; Print the case table TABLE to a file named by PATHNAME. +(defun dump-case-table (pathname table stage2-size) (with-open-file (stream pathname :direction :output :if-exists :supersede) + (format stream + "~ +/* + * DO NOT EDIT. + * + * This was generated by (BUILD-CASE-TABLE ~D) in + * src/tools/create-case-table.c. + */~2%" +stage2-size) (format stream "#include <stdint.h>~%") - (format stream "#include <stddef.h>~2%") + (format stream "#include <stddef.h>~%") ;; First dump each stage2 table (loop for k from 0 for s2 across table @@ -68,4 +131,9 @@ do (format stream " &stage2_~D,~%" k) else do (format stream " NULL,~%")) - (format stream "};~%"))) + (format stream "};~%") + (format t "Wrote ~S~%" (namestring stream)))) + +(defun build-case-table (&key (stage2-size 6) (pathname "./src/lisp/case-table.c")) + (let ((table (compute-case-table stage2-size))) + (dump-case-table pathname table stage2-size)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/6d25998929133a5164de1ea...