Date: Tuesday, February 22, 2011 @ 22:02:34 Author: rtoy Path: /project/cmucl/cvsroot/src/code
Modified: unidata.lisp
Fix bug where cmucl was no longer recognizing things like #\latin_small_letter_a. This failure is caused by the new SEARCH-DICTIONARY function that does partial completion, and UNICODE-NAME-TO-CODEPOINT function wan't aware of the new way.
We could change UNICODE-NAME-TO-CODEPOINT to do the appropriate thing with the new way, but I (rtoy) decided it would be nice to have the old function around too. Hence, restore the old version and use it.
--------------+ unidata.lisp | 61 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 12 deletions(-)
Index: src/code/unidata.lisp diff -u src/code/unidata.lisp:1.23 src/code/unidata.lisp:1.24 --- src/code/unidata.lisp:1.23 Wed Sep 29 16:51:19 2010 +++ src/code/unidata.lisp Tue Feb 22 22:02:33 2011 @@ -4,7 +4,7 @@ ;;; This code was written by Paul Foley and has been placed in the public ;;; domain. ;;; -(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.23 2010-09-29 20:51:19 rtoy Exp $") +(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.24 2011-02-23 03:02:33 rtoy Exp $") ;;; ;;; ********************************************************************** ;;; @@ -18,7 +18,7 @@
(defconstant +unidata-path+ #p"ext-formats:unidata.bin")
-(defvar *unidata-version* "$Revision: 1.23 $") +(defvar *unidata-version* "$Revision: 1.24 $")
(defstruct unidata range @@ -339,6 +339,45 @@ i)) stack))))))))
+;; Like SEARCH-DICTIONARY, but we don't try to do partial matches. We +;; do an exact match on the given string. +(defun exact-match-dictionary (string dictionary) + (declare (optimize (speed 3) (space 0) (safety 0) + (ext:inhibit-warnings 3)) + (type string string) (type dictionary dictionary)) + (let* ((codebook (dictionary-cdbk dictionary)) + (current 0) + (posn 0) + (stack '())) + (declare (type (unsigned-byte 32) current) (type lisp::index posn)) + (loop + (let ((keyv (ash (aref (dictionary-nextv dictionary) current) -18))) + (dotimes (i (aref (dictionary-keyl dictionary) keyv) + (if stack + (let ((next (pop stack))) + (setq posn (car next) current (cdr next))) + (return-from exact-match-dictionary nil))) + (let* ((str (aref codebook (aref (dictionary-keyv dictionary) + (+ keyv i)))) + (len (length str))) + (declare (type simple-base-string str)) + (when (and (>= (length string) (+ posn len)) + (string= string str :start1 posn :end1 (+ posn len))) + (setq current + (+ (logand (aref (dictionary-nextv dictionary) current) + #x3FFFF) + i)) + (when (= (incf posn len) (length string)) + (return-from exact-match-dictionary current)) + (return)) ; from DOTIMES - loop again + (when (or (string= str " ") (string= str "-")) + (push (cons posn + (+ (logand (aref (dictionary-nextv dictionary) + current) + #x3FFFF) + i)) + stack)))))))) + (defun search-range (code range) (declare (optimize (speed 3) (space 0) (safety 0)) (type codepoint code) (type range range)) @@ -727,20 +766,18 @@ nil))) (t (unless (unidata-name+ *unicode-data*) (load-names)) - (let* ((names (unidata-name+ *unicode-data*))) - (multiple-value-bind (n p) - (search-dictionary name names) - (when (and n (= p (length name))) - (let ((cp (aref (dictionary-codev names) n))) - (if (minusp cp) nil cp)))))))) + (let* ((names (unidata-name+ *unicode-data*)) + (n (exact-match-dictionary name names))) + (when n + (let ((cp (aref (dictionary-codev names) n))) + (if (minusp cp) nil cp)))))))
(defun unicode-1.0-name-to-codepoint (name) (declare (type string name)) (unless (unidata-name1+ *unicode-data*) (load-1.0-names)) - (let* ((names (unidata-name1+ *unicode-data*))) - (multiple-value-bind (n p) - (search-dictionary name names) - (when (and n (= p (length name))) + (let* ((names (unidata-name1+ *unicode-data*)) + (n (exact-match-dictionary name names))) + (when n (let ((cp (aref (dictionary-codev names) n))) (if (minusp cp) nil cp))))))