Raymond Toy pushed to branch issue-511-update-unicode-tests at cmucl / cmucl

Commits:

3 changed files:

Changes:

  • src/code/unidata.lisp
    ... ... @@ -715,9 +715,9 @@
    715 715
     
    
    716 716
     (defloader load-word-break (stm 18)
    
    717 717
       (multiple-value-bind (split hvec mvec lvec)
    
    718
    -      (read-ntrie 4 stm)
    
    718
    +      (read-ntrie 8 stm)
    
    719 719
         (setf (unidata-word-break *unicode-data*)
    
    720
    -	  (make-ntrie4 :split split :hvec hvec :mvec mvec :lvec lvec))))
    
    720
    +	  (make-ntrie8 :split split :hvec hvec :mvec mvec :lvec lvec))))
    
    721 721
     
    
    722 722
     ;;; Accessor functions.
    
    723 723
     
    
    ... ... @@ -1193,7 +1193,7 @@
    1193 1193
       (unless (unidata-word-break *unicode-data*)
    
    1194 1194
         (load-word-break))
    
    1195 1195
       (let* ((data (unidata-word-break *unicode-data*))
    
    1196
    -	 (n (qref4 data code)))
    
    1196
    +	 (n (qref8 data code)))
    
    1197 1197
         n))
    
    1198 1198
     
    
    1199 1199
     (defun unicode-word-break (code)
    
    ... ... @@ -1201,7 +1201,9 @@
    1201 1201
       ;; pack-word-break in tools/build-unidata.lisp!
    
    1202 1202
       (aref #(:other :cr :lf :newline :extend :format
    
    1203 1203
     	  :katakana :aletter :midnumlet :midletter :midnum
    
    1204
    -	  :numeric :extendnumlet :regional_indicator)
    
    1204
    +	  :numeric :extendnumlet :regional_indicator
    
    1205
    +	  :hebrew_letter :single_quote :double_quote
    
    1206
    +	  :zwj :wsegspace)
    
    1205 1207
     	(unicode-word-break-code code)))
    
    1206 1208
     
    
    1207 1209
     ;; Support for character name completion for slime.
    

  • src/i18n/unidata.bin
    No preview for this file type
  • src/tools/build-unidata.lisp
    ... ... @@ -637,7 +637,7 @@
    637 637
     	;; 18. Word-break
    
    638 638
     	(let ((data (unidata-word-break *unicode-data*)))
    
    639 639
     	  (update-index (file-position stm) index)
    
    640
    -	  (write-ntrie4 data stm))
    
    640
    +	  (write-ntrie8 data stm))
    
    641 641
     	;; All components saved. Patch up index table now.
    
    642 642
     	(file-position stm 8)
    
    643 643
     	(dotimes (i (length index))
    
    ... ... @@ -1011,7 +1011,13 @@
    1011 1011
       (or (position (ucdent-word-break ucdent)
    
    1012 1012
     		'(:other :cr :lf :newline :extend :format
    
    1013 1013
     		  :katakana :aletter :midnumlet :midletter :midnum
    
    1014
    -		  :numeric :extendnumlet :regional_indicator))
    
    1014
    +		  :numeric :extendnumlet :regional_indicator
    
    1015
    +		  ;; Classes added since Unicode 6.2 (6.3: hebrew_letter,
    
    1016
    +		  ;; single_quote, double_quote; 9.0: zwj; 11.0: wsegspace).
    
    1017
    +		  ;; Appended so existing indices are preserved; the array in
    
    1018
    +		  ;; unicode-word-break MUST match this order.
    
    1019
    +		  :hebrew_letter :single_quote :double_quote
    
    1020
    +		  :zwj :wsegspace))
    
    1015 1021
           0))
    
    1016 1022
     
    
    1017 1023
     ;; ucd-directory should be the directory where UnicodeData.txt is
    
    ... ... @@ -1213,7 +1219,7 @@
    1213 1219
         (let ((split #x66))
    
    1214 1220
           (multiple-value-bind (hvec mvec lvec)
    
    1215 1221
     	  (pack ucd range (lambda (x) (pack-word-break x))
    
    1216
    -		0 4 split)
    
    1222
    +		0 8 split)
    
    1217 1223
     	(setf (unidata-word-break *unicode-data*)
    
    1218
    -	      (make-ntrie4 :split split :hvec hvec :mvec mvec :lvec lvec))))
    
    1224
    +	      (make-ntrie8 :split split :hvec hvec :mvec mvec :lvec lvec))))
    
    1219 1225
         nil))