Raymond Toy pushed to branch issue-322-optimize-case-mapping-size at cmucl / cmucl

Commits:

1 changed file:

Changes:

  • src/tools/create-case-table.lisp deleted
    1
    -;; Creates a table of tables that maps a lower case letter to an upper
    
    2
    -;; case letter or an upper case letter to a lower case letter.  This
    
    3
    -;; mapping only works if the roundtrip casing returns the original
    
    4
    -;; character, as required by the standard.
    
    5
    -;;
    
    6
    -;; STAGE2-SIZE is the number of bits to used for the index of the
    
    7
    -;; second stage table.
    
    8
    -;;
    
    9
    -;; Let C be a 16-bit character code.  C is decomposed into two parts.
    
    10
    -;; The high bits are used as the index into the first table, and the
    
    11
    -;; low bits are used as the index into the second table.  The number
    
    12
    -;; of low bits is STAGE2-SIZE.
    
    13
    -;;
    
    14
    -;; If the second stage table is all zeroes, the table is replaced by
    
    15
    -;; NIL since it contains no valid mapping of lower or upper case
    
    16
    -;; letters.
    
    17
    -;;
    
    18
    -;; Each element of this table is 32-bits long.  The low 16 bits
    
    19
    -;; contains the mapping of C to the corresponding upper case letter.
    
    20
    -;; The high 16 bits maps C to the corresponding lower case letter.
    
    21
    -(defun compute-case-table (stage2-size)
    
    22
    -  (let ((table (make-array (ash 1 (- 16 stage2-size)))))
    
    23
    -    (dotimes (i (length table))
    
    24
    -      (setf (aref table i) (make-array (ash 1 stage2-size)
    
    25
    -                                       :initial-element 0
    
    26
    -                                       :element-type '(unsigned-byte 32))))
    
    27
    -    (dotimes (i char-code-limit)
    
    28
    -      (let ((stage1 (ldb (byte (- 16 stage2-size) stage2-size) i))
    
    29
    -	    (stage2 (ldb (byte stage2-size 0) i)))
    
    30
    -        (let ((upper (lisp::unicode-upper i))
    
    31
    -              (lower (lisp::unicode-lower i))
    
    32
    -              (entry 0))
    
    33
    -          (declare (type (unsigned-byte 32) entry))
    
    34
    -
    
    35
    -          (assert (< upper char-code-limit))
    
    36
    -          (assert (< lower char-code-limit))
    
    37
    -          
    
    38
    -          ;; Compute mapping from lower case to upper case which is
    
    39
    -          ;; stored in the low 16 bits of the stage2 table.
    
    40
    -          ;;
    
    41
    -          ;; Only consider characters that have an upper case letter and
    
    42
    -          ;; whose lowercase version returns the original letter.
    
    43
    -          (when (and (/= i upper)
    
    44
    -		     (= i (lisp::unicode-lower upper)))
    
    45
    -	    (setf entry (ldb (byte 16 0) (- i upper))))
    
    46
    -          ;; Compute mapping from upper case to lower case which is
    
    47
    -          ;; stored in the high 16 bits ofthe stage2 table.
    
    48
    -          ;;
    
    49
    -          ;; Only consider characters that have a lower case letter and
    
    50
    -          ;; whose upper case version returns the original letter.
    
    51
    -          (when (and (/= i lower)
    
    52
    -		     (= i (lisp::unicode-upper lower)))
    
    53
    -            (setf entry (ash (ldb (byte 16 0) (- i lower))
    
    54
    -                             16)))
    
    55
    -
    
    56
    -          ;; Note: the entry can only contain a lower case code or an
    
    57
    -          ;; upper case code, not both because we a character is
    
    58
    -          ;; either lower case or upper case and not both at the same
    
    59
    -          ;; time.
    
    60
    -	  (setf (aref (aref table stage1) stage2)
    
    61
    -                entry))))
    
    62
    -
    
    63
    -    ;; Find each stage2 table that is all zeroes and replace it with
    
    64
    -    ;; NIL.
    
    65
    -    (dotimes (k (length table))
    
    66
    -      (let ((empty (count-if-not #'zerop (aref table k))))
    
    67
    -        (when (zerop empty)
    
    68
    -          (setf (aref table k) nil))))
    
    69
    -    table))
    
    70
    -
    
    71
    -;; Given a case-mapping table TABLE, print some information about the
    
    72
    -;; size of the tables.  This includes the number of empty and
    
    73
    -;; non-empty stage2 tables.  Also print out how many total non-NIL
    
    74
    -;; entries are needed.  This is proportional to the total amount of
    
    75
    -;; memory needed to store all the tables.
    
    76
    -(defun print-table-stats (table stage2-size)
    
    77
    -  (let ((stage1-size (length table))
    
    78
    -        (stage2 (loop for v across table
    
    79
    -                      when v
    
    80
    -                        sum (length v)))
    
    81
    -        (empty (count-if #'null table)))
    
    82
    -    (format t "stage2-size ~D~%" stage2-size)
    
    83
    -    (format t "  stage1 entries:  ~D: " stage1-size)
    
    84
    -    (format t "  ~D non-empty ~D empty~%" (- stage1-size empty) empty)
    
    85
    -    (format t "  stage2 entries:  ~D (length ~D)~%"
    
    86
    -            stage2 (ash 1 stage2-size))
    
    87
    -    (format t "  total         :  ~D~%" (+ (length table) stage2))
    
    88
    -    (+ (length table) stage2)))
    
    89
    -
    
    90
    -(defun find-optimum-size ()
    
    91
    -  (let ((results
    
    92
    -          (first
    
    93
    -           (sort (loop for stage2-size from 1 to 15
    
    94
    -                       collect (list stage2-size
    
    95
    -                                     (print-table-stats
    
    96
    -                                      (compute-case-table stage2-size)
    
    97
    -                                      stage2-size)))
    
    98
    -                 #'<
    
    99
    -                 :key #'second))))
    
    100
    -    (format t "Optimum table size:  stage2-size ~D, space ~D~%"
    
    101
    -            (first results)
    
    102
    -            (second results))))
    
    103
    -
    
    104
    -;; Print the case table TABLE to a file named by PATHNAME.
    
    105
    -(defun dump-case-table (pathname table stage2-size)
    
    106
    -  ;; The first entry in the table MUST be NIL because we use that as
    
    107
    -  ;; the all-zeroes array because of the sparse entries in the table.
    
    108
    -  (assert (null (aref table 0)))
    
    109
    -
    
    110
    -  (with-open-file (stream pathname :direction :output :if-exists :supersede)
    
    111
    -    (format stream 
    
    112
    -            "~
    
    113
    -/*
    
    114
    - * DO NOT EDIT.
    
    115
    - *
    
    116
    - * This was generated by (BUILD-CASE-TABLE :STAGE2-SIZE ~D) in
    
    117
    - * src/tools/create-case-table.c.
    
    118
    - */~2%"
    
    119
    -	    stage2-size)
    
    120
    -    (format stream "#include <stdint.h>~%")
    
    121
    -    (format stream "#include <stddef.h>~%")
    
    122
    -    (format stream "~2%const uint32_t stage2[] = {~%")
    
    123
    -    (flet ((print-table (header table stream)
    
    124
    -             ;; Neatly print the table TABLE to STREAM.  Each table is
    
    125
    -             ;; preceded by a C comment in HEADER.  The entries are
    
    126
    -             ;; printed in hex, and neatly wrapped.
    
    127
    -	     (format stream "/* ~A */" header)
    
    128
    -	     (pprint-newline :mandatory stream)
    
    129
    -	     (dotimes (n (length table))
    
    130
    -	       (unless (zerop n)
    
    131
    -		 (write-char #\, stream)
    
    132
    -		 (write-char #\space stream)
    
    133
    -		 (pprint-newline :fill stream))
    
    134
    -	       ;;(pprint-pop)
    
    135
    -	       (format stream "0x~8,'0x" (aref table n)))
    
    136
    -	     (princ #\, stream)
    
    137
    -	     (pprint-newline :mandatory stream)))
    
    138
    -      (let ((index 0)
    
    139
    -	    offsets)
    
    140
    -	(pprint-logical-block (stream nil :prefix "    ")
    
    141
    -	  (print-table "zeroes"
    
    142
    -		       (make-array (ash 1 stage2-size)
    
    143
    -				   :initial-element 0)
    
    144
    -		       stream)
    
    145
    -	  (loop for k from 0
    
    146
    -		for s2 across table
    
    147
    -		when s2
    
    148
    -		  do (progn
    
    149
    -		       (incf index (ash 1 stage2-size))
    
    150
    -		       (push index offsets)
    
    151
    -		       (print-table (format nil "stage2_~D (offset ~D)" k index)
    
    152
    -				    s2
    
    153
    -				    stream))))
    
    154
    -	(format stream "};~%")
    
    155
    -    
    
    156
    -	;; Now dump the stage1 table
    
    157
    -	(format stream "~2%const uint16_t case_mapping[~D] = {~%"
    
    158
    -		(length table))
    
    159
    -	(setf offsets (nreverse offsets))
    
    160
    -	(loop for s2 across table
    
    161
    -              for k from 0
    
    162
    -	      if s2
    
    163
    -		do (format stream "   0x~4,'0x, /* stage2_~D */~%"
    
    164
    -			   (pop offsets)
    
    165
    -			   k)
    
    166
    -	      else
    
    167
    -		do (format stream "   0x~4,'0x,~%"
    
    168
    -			     0))
    
    169
    -	(format stream "};~%")
    
    170
    -	(format t "Wrote ~S~%" (namestring stream))))))
    
    171
    -
    
    172
    -(defun build-case-table (&key (stage2-size 6) (pathname "./src/lisp/case-mapping.c"))
    
    173
    -  (let ((table (compute-case-table stage2-size)))
    
    174
    -    (dump-case-table pathname table stage2-size)))