Date: Sunday, September 19, 2010 @ 19:07:46 Author: rtoy Path: /project/cmucl/cvsroot/src/code
Modified: unidata.lisp
o Move %STR, %STRX and %MATCH around so that we can inline them (because they're so simple). o Add some comments for %STR. o Change implementation of %MATCH to be simpler and add comments on why we do what we do and explain what happens if we don't. o Handle completion of Hangul syllables better: - Match "Hangul_S" instead of "Hangul_Syllable" because there's #\Hangul_Single_Dot_Tone_Mark. - If we match "Hangul_S", try to complete some Hangul syllables so we don't fool slime into thinking "Hangul_Syllable_" is the only completion. There are obviously more. o Handle completion of CJK Unified Ideographs better by trying to complete more so slime isn't fooled into thinking "CJK_Unified_Ideograph-" is the only possible completion.
--------------+ unidata.lisp | 91 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 28 deletions(-)
Index: src/code/unidata.lisp diff -u src/code/unidata.lisp:1.18 src/code/unidata.lisp:1.19 --- src/code/unidata.lisp:1.18 Sat Sep 18 22:37:10 2010 +++ src/code/unidata.lisp Sun Sep 19 19:07:46 2010 @@ -4,7 +4,7 @@ ;;; This code was written by Paul Foley and has been placed in the public ;;; domain. ;;; -(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.18 2010-09-19 02:37:10 rtoy Exp $") +(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.19 2010-09-19 23:07:46 rtoy Exp $") ;;; ;;; ********************************************************************** ;;; @@ -18,7 +18,7 @@
(defconstant +unidata-path+ #p"ext-formats:unidata.bin")
-(defvar *unidata-version* "$Revision: 1.18 $") +(defvar *unidata-version* "$Revision: 1.19 $")
(defstruct unidata range @@ -1132,7 +1132,43 @@ (defvar *cjk-unified-ideograph-dictionary* nil "Dictionary of CJK Unified ideographs")
-;; +;; Convert the string into the form we want for character names. +;; Basically the Unicode name has spaces replaced by underscores, and +;; the result is capitalized. +(declaim (inline %str %strx)) +(defun %str (x) + (nsubstitute #_ #\Space (string-capitalize x))) + +(defun %strx (x) + (%str (car x))) + +(declaim (inline %match)) +#+(or) +(defun %match (part prefix posn) + (and (>= (length part) (- (length prefix) posn)) + (string= part prefix :start2 posn :end1 (- (length prefix) posn)))) + +#+(or) +(defun %match (part prefix posn) + (let ((s1 (search part prefix :start2 posn)) + (s2 (search prefix part :start1 posn))) + (or (and s1 (= s1 posn)) + (and s2 (zerop s2))))) + +;; Test if the string PART matches the string PREFIX starting from +;; position POSN. Basically test that the initial parts of the +;; strings match each other exactly. For if the prefix is "BO", then +;; both "B" and "BOX" should match. (This is needed to get the +;; completion of "cjk_radical_bo" to match "cjk_radical_box" as well +;; as "cjk_radical_bone" and others because at one point in the +;; algorithm the part is "B", which we do want to match "BO" so that +;; we can get the possible completions BONE" and "BOLT OF CLOTH". +(defun %match (part prefix posn) + (let ((len (min (length part) + (- (length prefix) posn)))) + (string= part prefix :end1 len :start2 posn :end2 (+ posn len)))) + + (defun unicode-complete-name (prefix &optional (dict (unidata-name+ *unicode-data*))) @@ -1270,36 +1306,35 @@ (when completep (push prefix-match names)) ;; Match prefix against Hangul and/or Hangul_syllable - (cond ((or (string= prefix-match "Hangul_") - (search "Hangul_Syllable_" prefix-match :end1 (min 16 (length prefix-match)))) - ;; Add syllable as possible completion - (push "Hangul_Syllable_" names)) + (cond ((search "Hangul_S" prefix-match + :end1 (min 8 (length prefix-match))) + ;; Add syllable as possible completion, and then try to + ;; complete some more so that we don't end up with slime + ;; saying "Hangul_Syllable_" is the only completion. + (multiple-value-bind (m suffixes) + (unicode-complete-name (subseq prefix-match (min 16 (length prefix-match))) + *hangul-syllable-dictionary*) + (declare (ignore m)) + (if suffixes + (loop for n in suffixes + do (push (concatenate 'string "Hangul_Syllable_" n) names)) + (push "Hangul_Syllable_" names)))) ((or ;;(string= prefix-match "Cjk_") - (search "Cjk_Unified_Ideograph-" prefix-match :end1 (min 22 (length prefix-match)))) - ;; Add Unified - (push "Cjk_Unified_Ideograph-" names))) + (search "Cjk_Unified_Ideograph-" prefix-match + :end1 (min 22 (length prefix-match)))) + ;; Try to complete the first part so we don't get + ;; "Cjk_Unified_Ideograph-" as the only completion. + (multiple-value-bind (m suffixes) + (unicode-complete-name (subseq prefix-match (min 22 (length prefix-match))) + *cjk-unified-ideograph-dictionary*) + (declare (ignore m)) + (loop for n in suffixes + do (push (concatenate 'string "Cjk_Unified_Ideograph-" n) names))))) (setf names (mapcar #'string-capitalize names)) ;;(format t "Final names = ~S~%" names) names)))
-;; Convert the string into the form we want for character names. -(defun %str (x) - (nsubstitute #_ #\Space (string-capitalize x))) - -(defun %strx (x) - (%str (car x))) - -#+(or) -(defun %match (part prefix posn) - (and (>= (length part) (- (length prefix) posn)) - (string= part prefix :start2 posn :end1 (- (length prefix) posn)))) - -(defun %match (part prefix posn) - (let ((s1 (search part prefix :start2 posn)) - (s2 (search prefix part :start1 posn))) - (or (and s1 (= s1 posn)) - (and s2 (zerop s2))))) - +;; Find the longest initial substring of the STRINGS. (defun %mip (strings) (let* ((first (first strings)) (posn (length first)))