;;; Character completion ;; This is CMUCL's completion code. If CMUCL already contains the ;; necessary code, don't try to load this. (in-package "LISP") #+(and unicode #.(cl:if (cl:find-symbol "UNICODE-COMPLETE" "LISP") '(or) '(and))) (ext:without-package-locks (defvar *hangul-syllable-dictionary* nil "Dictionary of Hangul syllables") (defvar *cjk-unified-ideograph-dictionary* nil "Dictionary of CJK Unified ideographs") (defun search-dictionary (string dictionary &optional (current 0) (posn 0)) "Search the Unicode name dictionary for the longest entry that matches STRING. STRING must be in Unicode name format. That is, it must be upper case with spaces separating each word. Two values are returned. The first value is index into the codebook that continues the string.. The second value is the length of the substring of string that matches the codebook. " (declare (optimize (speed 3) (space 0) (safety 0) (ext:inhibit-warnings 3)) (type string string) (type dictionary dictionary) (type (unsigned-byte 32) current) (type lisp::index posn)) (let* ((codebook (dictionary-cdbk dictionary)) (stack '())) (loop (let ((keyv (ash (aref (dictionary-nextv dictionary) current) -18))) (dotimes (i (aref (dictionary-keyl dictionary) keyv) (if stack (let ((next (pop stack))) (setq posn (car next) current (cdr next))) (return-from search-dictionary nil))) (let* ((str (aref codebook (aref (dictionary-keyv dictionary) (+ keyv i)))) (len (length str))) (declare (type simple-base-string str)) (cond ((and (>= (length string) (+ posn len)) (string= string str :start1 posn :end1 (+ posn len))) (setq current (+ (logand (aref (dictionary-nextv dictionary) current) #x3FFFF) i)) (when (= (incf posn len) (length string)) (return-from search-dictionary (values current posn))) (return)) ; from DOTIMES - loop again ((and (< (length string) (+ posn len)) (string= string str :start1 posn :end2 (- (length string) posn))) (return-from search-dictionary (values current posn)))) (when (or (string= str " ") (string= str "-")) (push (cons posn (+ (logand (aref (dictionary-nextv dictionary) current) #x3FFFF) i)) stack)))))))) (defun hangul-syllable-p (code) ;; Search src/i18n/UnicodeData.txt for "Hangule Syllable" to find ;; the values here. (<= #xAC00 code #xD7A3)) (defun cjk-ideograph-p (code) ;; Search src/i18n/UnicodeData.txt for "CJK Ideograph" to find the ;; values here. (or (<= #x3400 code #x4DB5) ; CJK Ideograph Extension A (<= #x4E00 code #x9FCB) ; CJK Ideograph (<= #x20000 code #x2A6D6) ; CJK Ideograph Extension B (<= #X2A700 code #X2B734))) (defun initialize-reverse-hangul-tables () (unless (boundp '*reverse-hangul-choseong*) (setq *reverse-hangul-choseong* (sort (coerce (loop for x across +hangul-choseong+ as i upfrom 0 by 588 collect (cons x i)) 'vector) #'> :key (lambda (x) (length (car x))))) (setq *reverse-hangul-jungseong* (sort (coerce (loop for x across +hangul-jungseong+ as i upfrom 0 by 28 collect (cons x i)) 'vector) #'> :key (lambda (x) (length (car x))))) (setq *reverse-hangul-jongseong* (sort (coerce (loop for x across +hangul-jongseong+ as i upfrom 1 collect (cons x i)) 'vector) #'> :key (lambda (x) (length (car x))))))) (defun unicode-complete-name (prefix &optional (dict (unidata-name+ *unicode-data*))) "Try to complete the string Prefix using the dictionary in Dict. Three values are returned: (1) The best match of prefix, (2) a list of possible completions, (3) a boolean indicating whether the best match is a complete unicode name. " (unless dict ;; Load the names dictionary, if needed. (unless (unidata-name+ *unicode-data*) (load-names)) (setf dict (unidata-name+ *unicode-data*))) (let ((prefix (nsubstitute #\Space #\_ (string-upcase prefix))) completep) (multiple-value-bind (n p) (search-dictionary prefix dict) (when n (setq completep (> (aref (dictionary-codev dict) n) -1))) #+(or debug-uc) (format t "n,p,complete = ~S ~S ~S~%" n p completep) (cond ((not p) (values (%str prefix) nil nil)) ((= p (length prefix)) ;; The prefix is an exact match to something in the code ;; book. Try to find possible completions of this ;; prefix. (let ((x (node-next n dict)) (suffix "")) #+(or debug-uc) (format t "init x = ~S~%" x) (when (= (length x) 1) ;; There was only one possible extension. Try to ;; extend from there. #+(or debug-uc) (format t "extending~%") (setq suffix (caar x) n (cdar x) x (node-next (cdar x) dict))) #+(or debug-uc) (progn (format t "x = ~S~%" x) (format t "suffix = ~S~%" suffix)) (when (<= (length x) 1) (setq prefix (concatenate 'string prefix suffix)) (setf suffix "")) (values (%str prefix) (sort (mapcar #'(lambda (e) (%str (concatenate 'string suffix (car e)))) x) #'string<) (or (> (aref (dictionary-codev dict) n) -1) completep)))) (t ;; The prefix was not an exact match of some entry in the ;; codebook. Try to find some completions from there. (let* ((nodex (node-next n dict)) (x (remove-if-not (lambda (x) (%match (car x) prefix p)) nodex))) #+(or debug-uc) (progn (format t "nodex = ~S~%" nodex) (format t "x = ~S~%" x)) (setq prefix (subseq prefix 0 p)) (cond ((= (length x) 1) ;; Only one possible completion. Try to extend ;; the completions from there. (setq prefix (concatenate 'string prefix (caar x)) n (cdar x) x (node-next (cdar x) dict)) (values (%str prefix) (sort (mapcar #'%strx x) #'string<) (> (aref (dictionary-codev dict) n) -1))) (t ;; There's more than one possible completion. ;; Try to extend each of those completions one ;; more step. (let* ((p (mapcan #'(lambda (ex) (let ((next (node-next (cdr ex) dict))) (if next (mapcar #'(lambda (n) (concatenate 'string (car ex) (car n))) (node-next (cdr ex) dict)) (list (car ex))))) x)) (q (%mip p))) (setq prefix (concatenate 'string prefix q)) (do ((tmp p (cdr tmp))) ((endp tmp)) (setf (car tmp) (subseq (car tmp) (length q)))) (values (%str prefix) (sort (mapcar #'%str p) #'string<) nil)))))))))) ;; Like unicode-complete-name, but we also try to handle the names ;; that can be computed algorithmically like the Hangul syllables and ;; the CJK Unified Ideographs. (defun unicode-complete (prefix &optional (dict (unidata-name+ *unicode-data*))) "Search the dictionary in Dict and return a list of the possible completions starting with Prefix. If there is no match, NIL is returned." (let (names) (multiple-value-bind (prefix-match next completep) (unicode-complete-name prefix dict) (loop for x in next do (push (concatenate 'string prefix-match x) names)) (when completep (push prefix-match names)) (flet ((han-or-cjk-completion (prefix-match prefix dictionary) (let* ((prefix-tail (subseq prefix-match (min (length prefix) (length prefix-match)))) (full-prefix (concatenate 'string prefix prefix-tail))) (multiple-value-bind (m suffixes) (unicode-complete-name prefix-tail dictionary) (declare (ignore m)) (if suffixes (loop for n in suffixes do (push (concatenate 'string full-prefix n) names)) ;; No suffixes. So either the prefix is the ;; only possible completion or it's not valid. ;; Figure that out. If it's valid, add it to ;; names. (when (search-dictionary (string-upcase prefix-tail) dictionary) (push prefix-match names))))))) ;; Match prefix for Hangul syllables or CJK unified ideographs. (cond ((char= (char prefix-match 0) #\H) ;; Add "Hangul_Syllable_" as possible completion for ;; anything beginning with "H". (push "Hangul_Syllable_" names) (when (<= (length names) 1) ;; Hangul_Syllable is the only match, so let's extend it. (unless *hangul-syllable-dictionary* (initialize-reverse-hangul-tables) (build-hangul-syllable-dictionary)) (han-or-cjk-completion prefix-match "Hangul_Syllable_" *hangul-syllable-dictionary*))) ((char= (char prefix-match 0) #\C) ;; Add "Cjk_Unified_Ideograph-" as possible completion ;; for anything beginning with "C". (push "Cjk_Unified_Ideograph-" names) (when (<= (length names) 1) (unless *cjk-unified-ideograph-dictionary* (build-cjk-unified-ideograph-dictionary)) (han-or-cjk-completion prefix-match "Cjk_Unified_Ideograph-" *cjk-unified-ideograph-dictionary*) )))) (setf names (mapcar #'string-capitalize names)) ;;(format t "Final names = ~S~%" names) names))) (defun %str (x) (nsubstitute #\_ #\Space (string-capitalize x))) (defun %strx (x) (%str (car x))) (defun %match (part prefix posn) (let ((s1 (search part prefix :start2 posn)) (s2 (search prefix part :start1 posn))) (or (and s1 (= s1 posn)) (and s2 (zerop s2))))) (defun %mip (strings) (let* ((first (first strings)) (posn (length first))) (dolist (string (rest strings)) (setq posn (or (mismatch first string :end1 posn) posn))) (subseq first 0 posn))) (defun node-next (i &optional (dict (unidata-name+ *unicode-data*))) (let* ((j (aref (dictionary-nextv dict) i)) (x (ldb (byte 14 18) j)) (y (ldb (byte 18 0) j))) (loop for i from 0 below (aref (dictionary-keyl dict) x) collect (close-node (cons (aref (dictionary-cdbk dict) (aref (dictionary-keyv dict) (+ x i))) (+ y i)) dict)))) (defun close-node (i &optional (dict (unidata-name+ *unicode-data*))) (loop (if (> (aref (dictionary-codev dict) (cdr i)) -1) (return i) (let* ((j (aref (dictionary-nextv dict) (cdr i))) (x (ldb (byte 14 18) j)) (y (ldb (byte 18 0) j))) (if (> (aref (dictionary-keyl dict) x) 1) (return i) (let ((k (aref (dictionary-cdbk dict) (aref (dictionary-keyv dict) x)))) (setf (car i) (concatenate 'string (car i) k) (cdr i) y))))))) (defun build-hangul-syllable-dictionary () "Build the dictionary for Hangul syllables" ;;(format t "~&Building Hangul Syllable dictionary. Please wait...~%") ;;(force-output) (initialize-reverse-hangul-tables) (let ((hangul-codebook ;; For our codebook, combine all the choseong, jungseong, and ;; jonseong syllables, but removing empty strings (there's at ;; least one). Then sort these according to length. This ;; ensures that if A is an initial substring of B, then B ;; must come before A (or A will never be used). (See ;; tools/build-unidata.lisp, *codebook*.) (sort (map 'vector #'car (delete "" (concatenate 'vector *reverse-hangul-choseong* *reverse-hangul-jungseong* *reverse-hangul-jongseong*) :test #'string= :key #'car)) #'> :key #'length)) (names (loop for codepoint from 0 below codepoint-limit when (hangul-syllable-p codepoint) collect (cons (subseq (format nil "~A" (string-upcase (char-name (code-char codepoint)))) 16) codepoint)))) (setf *hangul-syllable-dictionary* (build-dictionary hangul-codebook names)) ;;(format t "~&Done.~%") ;;(force-output) (values))) (defun build-cjk-unified-ideograph-dictionary () "Build the dictionary for CJK Unified Ideographs" ;;(format t "~&Building CJK Unified Ideographs dictionary. Please wait...~%") (let ((codebook (coerce (loop for k from 0 to 15 collect (format nil "~X" k)) 'vector)) (names (loop for codepoint from 0 below codepoint-limit when (cjk-ideograph-p codepoint) collect (cons (format nil "~X" codepoint) codepoint)))) (setf *cjk-unified-ideograph-dictionary* (build-dictionary codebook names)) ;;(format t "~&Done.~%") (values))) ;; The definitions of BUILD-DICTIONARY, NAME-LOOKUP, and ENCODE-NAME ;; were taken from build-unidata.lisp. (defun build-dictionary (codebook entries) (let ((khash (make-hash-table :test 'equalp)) (thash (make-hash-table)) (top 0) (keyl (make-array 0 :element-type '(unsigned-byte 8))) (keyv (make-array 0 :element-type '(unsigned-byte 8))) vec1 vec2 vec3) (labels ((add-to-trie (trie name codepoint) (loop for ch across (encode-name name codebook) do (let ((sub (cdr (assoc ch (rest trie))))) (if sub (setq trie sub) (setq trie (cdar (push (cons ch (cons nil nil)) (rest trie))))))) (unless (or (null (car trie)) (= (car trie) codepoint)) (error "Codepoints #x~4,'0X and #x~4,'0X are both named ~S." (car trie) codepoint name)) (setf (car trie) codepoint)) (key (trie) (map '(simple-array (unsigned-byte 8) (*)) #'car (rest trie))) (pass1 (trie depth) (setf (rest trie) (sort (rest trie) #'< :key #'car)) (setf (gethash trie thash) (list depth (1- (incf top)) (length (rest trie)))) (setf (gethash (key trie) khash) t) (mapc (lambda (x) (pass1 (cdr x) (1+ depth))) (rest trie))) (pass2 (trie) (let* ((x (gethash (gethash trie thash) thash)) (n (car x))) (setf (aref vec1 n) (if (first trie) (first trie) -1) (aref vec2 n) (logior (ash (gethash (key trie) khash) 18) (cdr x)))) (mapc (lambda (x) (pass2 (cdr x))) (rest trie)))) (format t "~& Initializing...~%") (let ((trie (cons nil nil))) (loop for (name . code) in entries do (add-to-trie trie name code)) (format t "~& Pass 1...~%") (pass1 trie 0) (format t "~& Sorting...~%") (dolist (key (sort (loop for k being the hash-keys of khash collect k) #'> :key #'length)) (let ((pos -1)) (loop (setq pos (search key keyv :start2 (1+ pos))) (when (and pos (zerop (aref keyl pos))) (setf (aref keyl pos) (length key))) (when (and pos (= (aref keyl pos) (length key))) (setf (gethash key khash) pos) (return)) (when (null pos) (setf (gethash key khash) (length keyv)) (setf keyl (adjust-array keyl (+ (length keyv) (length key)))) (setf (aref keyl (length keyv)) (length key)) (setf keyv (concatenate '(simple-array (unsigned-byte 8) (*)) keyv key)) (return))))) (loop with off = 1 for key in (sort (loop for x being the hash-values of thash collect x) (lambda (a b) (if (= (first a) (first b)) (< (second a) (second b)) (< (first a) (first b))))) as i upfrom 0 do (setf (gethash key thash) (cons i off) off (+ off (third key)))) (setq vec1 (make-array top :element-type '(signed-byte 32)) vec2 (make-array top :element-type '(unsigned-byte 32)) vec3 (make-array top :element-type '(unsigned-byte 32))) (format t "~& Pass 2...~%") (pass2 trie) (format t "~& Finalizing~%") (dotimes (i top) (let ((xxx (aref vec2 i))) (dotimes (j (aref keyl (ash xxx -18))) (setf (aref vec3 (+ (logand xxx #x3FFFF) j)) i)))) (loop for (name . code) in entries do (let ((n (name-lookup name codebook keyv keyl vec2))) (unless n (error "Codepoint not found for ~S." name)) (setf (ldb (byte 14 18) (aref vec3 n)) (length name)))))) (make-dictionary :cdbk codebook :keyv keyv :keyl keyl :codev vec1 :nextv vec2 :namev vec3))) (defun name-lookup (name codebook keyv keyl nextv) (let* ((current 0) (posn 0)) (loop (let ((keyp (ash (aref nextv current) -18))) (dotimes (i (aref keyl keyp) (return-from name-lookup nil)) ; shouldn't happen (let* ((str (aref codebook (aref keyv (+ keyp i)))) (len (length str))) (when (and (>= (length name) (+ posn len)) (string= name str :start1 posn :end1 (+ posn len))) (setq current (+ (logand (aref nextv current) #x3FFFF) i)) (if (= (incf posn len) (length name)) (return-from name-lookup current) (return))))))))) (defun encode-name (string codebook) (let ((p 0) (res '())) (loop while (< p (length string)) do (dotimes (i (length codebook) (error "\"~C\" is not in the codebook." (char string p))) (let ((code (aref codebook i))) (when (and (<= (length code) (- (length string) p)) (string= string code :start1 p :end1 (+ p (length code)))) (push i res) (incf p (length code)) (return))))) (nreverse (coerce res 'vector)))) ) (in-package :swank-backend) (defun match-semi-standard (prefix matchp) ;; Handle the CMUCL's short character names. (loop for name in lisp::char-name-alist when (funcall matchp prefix (car name)) collect (car name))) (defimplementation character-completion-set (prefix matchp) (let ((names (lisp::unicode-complete prefix))) ;; Match prefix against semistandard names. If there's a match, ;; add it to our list of matches. (let ((semi-standard (match-semi-standard prefix matchp))) (when semi-standard (setf names (append semi-standard names)))) (setf names (mapcar #'string-capitalize names)) (loop for n in names when (funcall matchp prefix n) collect n)))