Update of /project/climacs/cvsroot/climacs In directory common-lisp.net:/tmp/cvs-serv5255
Modified Files: base-test.lisp base.lisp buffer-test.lisp climacs.asd packages.lisp Log Message: Added cl-automaton module and support for regexp searches. Below are some notes. Also modified one constituentp-related test.
Instead of having module "cl-automaton" within the :climacs defsystem, the module could be turned into a dependence on :automaton, defined in cl-automaton/automaton.asd. Similarly for cl-automaton/automaton-test.asd.
For slower buffer implementations, a buffer iterator is needed for higher performance of regexp searches. Greedy matching should be improved (see automaton::run-to-first-unmatch).
Perhaps, fast (tabular) automaton representation should be implemented, unless it would be taking way too much space.
Incremental regexp search needs to be done.
Date: Fri Aug 5 00:07:45 2005 Author: abakic
Index: climacs/base-test.lisp diff -u climacs/base-test.lisp:1.14 climacs/base-test.lisp:1.15 --- climacs/base-test.lisp:1.14 Sun Jul 17 19:20:27 2005 +++ climacs/base-test.lisp Fri Aug 5 00:07:44 2005 @@ -457,7 +457,7 @@ (constituentp #\Tab) (constituentp "a") (constituentp #\Null)) - t nil nil nil nil nil) + t nil nil nil nil #-sbcl nil #+sbcl t)
(defmultitest whitespacep.test-1 (values @@ -779,7 +779,7 @@ (defmultitest tabify-buffer-region.test-1 (let ((buffer (make-instance %%buffer))) (insert-buffer-sequence buffer 0 "c l im acs") - (climacs-base::tabify-buffer-region buffer 0 (size buffer) 8) + (climacs-base::tabify-buffer-region buffer 0 (size buffer) 8) (buffer-sequence buffer 0 (size buffer))) "c l im acs")
@@ -1103,6 +1103,36 @@ (buffer-search-backward buffer 1 ""))) 3 3 0 8 nil nil 0 1)
+(defmultitest buffer-re-search-forward.test-1 + (let ((buffer (make-instance %%buffer)) + (a1 (automaton::determinize + (regexp-automaton (string-regexp "i[mac]+s")))) + (a2 (automaton::determinize + (regexp-automaton (string-regexp "[^aeiou][aeiou]"))))) + (insert-buffer-sequence buffer 0 " +climacs") + (values + (buffer-re-search-forward a1 buffer 0) + (buffer-re-search-forward a2 buffer 1) + (buffer-re-search-forward a1 buffer 4) + (buffer-re-search-forward a2 buffer 6))) + 3 2 nil nil) + +(defmultitest buffer-re-search-backward.test-1 + (let ((buffer (make-instance %%buffer)) + (a1 (climacs-base::reversed-deterministic-automaton + (regexp-automaton (string-regexp "i[ma]+c")))) + (a2 (climacs-base::reversed-deterministic-automaton + (regexp-automaton (string-regexp "[^aeiou][aeiou]"))))) + (insert-buffer-sequence buffer 0 " +climacs") + (values + (buffer-re-search-backward a1 buffer 7) + (buffer-re-search-backward a2 buffer 7) + (buffer-re-search-backward a1 buffer 5) + (buffer-re-search-backward a2 buffer 2))) + 3 4 nil nil) + (defmultitest search-forward.test-1 (let ((buffer (make-instance %%buffer))) (insert-buffer-sequence buffer 0 " @@ -1156,6 +1186,62 @@ (let ((m (clone-mark (low-mark buffer) :right))) (setf (offset m) 3) (search-backward m "klimaks") + (offset m))) + 3) + +(defmultitest re-search-forward.test-1 + (let ((buffer (make-instance %%buffer))) + (insert-buffer-sequence buffer 0 " +climacs") + (let ((m (clone-mark (low-mark buffer) :left))) + (setf (offset m) 0) + (re-search-forward m "[mac]{3}") + (offset m))) + 7) + +(defmultitest re-search-forward.test-2 + (let ((buffer (make-instance %%buffer))) + (insert-buffer-sequence buffer 0 "climacs") + (let ((m (clone-mark (low-mark buffer) :right))) + (setf (offset m) 3) + (re-search-forward m "[mac]{3}") + (offset m))) + 6) + +(defmultitest re-search-forward.test-3 + (let ((buffer (make-instance %%buffer))) + (insert-buffer-sequence buffer 0 "climacs") + (let ((m (clone-mark (low-mark buffer) :right))) + (setf (offset m) 3) + (re-search-forward m "klimaks") + (offset m))) + 3) + +(defmultitest re-search-backward.test-1 + (let ((buffer (make-instance %%buffer))) + (insert-buffer-sequence buffer 0 "climacs +") + (let ((m (clone-mark (low-mark buffer) :left))) + (setf (offset m) 8) + (re-search-backward m "[mac]{3}") + (offset m))) + 3) + +(defmultitest re-search-backward.test-2 + (let ((buffer (make-instance %%buffer))) + (insert-buffer-sequence buffer 0 "climacs") + (let ((m (clone-mark (low-mark buffer) :right))) + (setf (offset m) 6) + (re-search-backward m "[mac]{3}") + (offset m))) + 3) + +(defmultitest re-search-backward.test-3 + (let ((buffer (make-instance %%buffer))) + (insert-buffer-sequence buffer 0 "climacs") + (let ((m (clone-mark (low-mark buffer) :right))) + (setf (offset m) 3) + (re-search-backward m "klimaks") (offset m))) 3)
Index: climacs/base.lisp diff -u climacs/base.lisp:1.39 climacs/base.lisp:1.40 --- climacs/base.lisp:1.39 Mon May 30 11:09:48 2005 +++ climacs/base.lisp Fri Aug 5 00:07:44 2005 @@ -608,6 +608,62 @@ return i finally (return nil)))
+(defun non-greedy-match-forward (a buffer i) + (let ((p (automaton::initial a))) + (loop for j from i below (size buffer) + for q = (automaton::sstep p (buffer-object buffer j)) do + (unless q + (return nil)) + (if (automaton::accept q) + (return (1+ j)) + (setq p q)) + finally (return nil)))) + +(defun buffer-re-search-forward (a buffer offset) + "Returns as the first value the smallest offset of BUFFER >= OFFSET +with contents accepted by deterministic automaton A; otherwise, +returns nil. If the first value is non-nil, the second value is the +offset after the matched contents." + (if (automaton::singleton a) + (buffer-search-forward buffer offset (automaton::singleton a)) + (loop for i from offset below (size buffer) do + (let ((j (non-greedy-match-forward a buffer i))) + (when j (return (values i j)))) + finally (return nil)))) + +(defun reversed-deterministic-automaton (a) + "Reverses and determinizes A, then returns it." + (if (automaton::singleton a) + (progn + (setf (automaton::singleton a) (reverse (automaton::singleton a))) + a) + (automaton::determinize2 + a + (make-instance 'automaton::state-set :ht (automaton::areverse a))))) + +(defun non-greedy-match-backward (a buffer i) + (let ((p (automaton::initial a))) + (loop for j downfrom i to 0 + for q = (automaton::sstep p (buffer-object buffer j)) do + (unless q + (return nil)) + (if (automaton::accept q) + (return j) + (setq p q)) + finally (return nil)))) + +(defun buffer-re-search-backward (a buffer offset) + "Returns as the first value the largest offset of BUFFER <= OFFSET +with contents accepted by (reversed) deterministic automaton A; +otherwise, returns nil. If the first value is non-nil, the second +value is the offset after the matched contents." + (if (automaton::singleton a) + (buffer-search-backward buffer offset (automaton::singleton a)) + (loop for i downfrom (min offset (1- (size buffer))) to 0 do + (let ((j (non-greedy-match-backward a buffer i))) + (when j (return (values j i)))) + finally (return nil)))) + (defun search-forward (mark vector &key (test #'eql)) "move MARK forward after the first occurence of VECTOR after MARK" (let ((offset (buffer-search-forward @@ -621,6 +677,29 @@ (buffer mark) (offset mark) vector :test test))) (when offset (setf (offset mark) offset)))) + +(defun re-search-forward (mark re) + "move MARK forward after the first occurence of string matching RE +after MARK" + (let ((a (automaton::determinize + (automaton::regexp-automaton + (automaton::string-regexp re))))) + (multiple-value-bind (i j) + (buffer-re-search-forward a (buffer mark) (offset mark)) + (when i + (setf (offset mark) j))))) + +(defun re-search-backward (mark re) + "move MARK backward before the first occurence of string matching RE +before MARK" + (let ((a (reversed-deterministic-automaton + (automaton::regexp-automaton + (automaton::string-regexp re))))) + (multiple-value-bind (i j) + (buffer-re-search-backward a (buffer mark) (offset mark)) + (declare (ignorable j)) + (when i + (setf (offset mark) i)))))
(defun buffer-search-word-backward (buffer offset word &key (test #'eql)) "return the largest offset of BUFFER <= (- OFFSET (length WORD))
Index: climacs/buffer-test.lisp diff -u climacs/buffer-test.lisp:1.20 climacs/buffer-test.lisp:1.21 --- climacs/buffer-test.lisp:1.20 Tue Mar 15 19:41:18 2005 +++ climacs/buffer-test.lisp Fri Aug 5 00:07:44 2005 @@ -4,7 +4,7 @@ ;;;
(cl:defpackage :climacs-tests - (:use :cl :rtest :climacs-buffer :climacs-base)) + (:use :cl :rtest :climacs-buffer :climacs-base :automaton))
(cl:in-package :climacs-tests)
Index: climacs/climacs.asd diff -u climacs/climacs.asd:1.35 climacs/climacs.asd:1.36 --- climacs/climacs.asd:1.35 Sun Jul 24 18:44:48 2005 +++ climacs/climacs.asd Fri Aug 5 00:07:45 2005 @@ -30,13 +30,19 @@ (defsystem :climacs :depends-on (:mcclim :flexichain) :components - ((:module "Persistent" + ((:module "cl-automaton" + :components ((:file "automaton-package") + (:file "eqv-hash" :depends-on ("automaton-package")) + (:file "state-and-transition" :depends-on ("eqv-hash")) + (:file "automaton" :depends-on ("state-and-transition" "eqv-hash")) + (:file "regexp" :depends-on ("automaton")))) + (:module "Persistent" :components ((:file "binseq-package") (:file "binseq" :depends-on ("binseq-package")) (:file "obinseq" :depends-on ("binseq-package" "binseq")) (:file "binseq2" :depends-on ("binseq-package" "obinseq" "binseq"))))
- (:file "packages" :depends-on ("Persistent")) + (:file "packages" :depends-on ("cl-automaton" "Persistent")) (:file "buffer" :depends-on ("packages")) (:file "persistent-buffer" :pathname #p"Persistent/persistent-buffer.lisp" @@ -74,7 +80,22 @@ :components ((:file "rt" :pathname #p"testing/rt.lisp") (:file "buffer-test" :depends-on ("rt")) - (:file "base-test" :depends-on ("rt")))) + (:file "base-test" :depends-on ("rt")) + (:file "automaton-test-package" + :pathname #P"cl-automaton/automaton-test-package.lisp" + :depends-on ("rt")) + (:file "eqv-hash-test" + :pathname #P"cl-automaton/eqv-hash-test.lisp" + :depends-on ("rt" "automaton-test-package")) + (:file "state-and-transition-test" + :pathname #P"cl-automaton/state-and-transition-test.lisp" + :depends-on ("rt" "automaton-test-package")) + (:file "automaton-test" + :pathname #P"cl-automaton/automaton-test.lisp" + :depends-on ("rt" "automaton-test-package")) + (:file "regexp-test" + :pathname #P"cl-automaton/regexp-test.lisp" + :depends-on ("rt" "automaton-test-package"))))
#+asdf (defmethod asdf:perform :around ((o asdf:compile-op)
Index: climacs/packages.lisp diff -u climacs/packages.lisp:1.72 climacs/packages.lisp:1.73 --- climacs/packages.lisp:1.72 Thu Aug 4 03:10:45 2005 +++ climacs/packages.lisp Fri Aug 5 00:07:45 2005 @@ -79,7 +79,9 @@ #:name-mixin #:name #:buffer-looking-at #:looking-at #:buffer-search-forward #:buffer-search-backward - #:search-forward #:search-backward)) + #:buffer-re-search-forward #:buffer-re-search-backward + #:search-forward #:search-backward + #:re-search-forward #:re-search-backward))
(defpackage :climacs-abbrev (:use :clim-lisp :clim :climacs-buffer :climacs-base)