Update of /project/climacs/cvsroot/climacs In directory common-lisp.net:/tmp/cvs-serv6981
Modified Files: html-syntax.lisp packages.lisp syntax.lisp Log Message: The incremental lexer is now in the climacs-syntax package in the syntax.lisp file.
Date: Tue Mar 15 06:39:25 2005 Author: rstrandh
Index: climacs/html-syntax.lisp diff -u climacs/html-syntax.lisp:1.13 climacs/html-syntax.lisp:1.14 --- climacs/html-syntax.lisp:1.13 Tue Mar 15 05:31:59 2005 +++ climacs/html-syntax.lisp Tue Mar 15 06:39:24 2005 @@ -24,92 +24,10 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; -;;; this should really go in syntax.lisp - -(defclass parse-tree () - ((start-mark :initarg :start-mark :reader start-mark) - (size :initarg :size))) - -(defgeneric start-offset (parse-tree)) - -(defmethod start-offset ((tree parse-tree)) - (offset (start-mark tree))) - -(defgeneric end-offset (parse-tree)) - -(defmethod end-offset ((tree parse-tree)) - (with-slots (start-mark size) tree - (+ (offset start-mark) size))) - -(defclass lexer () - ((buffer :initarg :buffer :reader buffer))) - -(defgeneric nb-lexemes (lexer)) -(defgeneric lexeme (lexer pos)) -(defgeneric insert-lexeme (lexer pos lexeme)) -(defgeneric delete-invalid-lexemes (lexer from to)) -(defgeneric inter-lexeme-object-p (lexer object)) -(defgeneric skip-inter-lexeme-objects (lexer scan)) -(defgeneric update-lex (lexer start-pos end)) - -(defclass incremental-lexer (lexer) - ((lexemes :initform (make-instance 'standard-flexichain) :reader lexemes))) - -(defmethod nb-lexemes ((lexer incremental-lexer)) - (nb-elements (lexemes lexer))) - -(defmethod lexeme ((lexer incremental-lexer) pos) - (element* (lexemes lexer) pos)) - -(defmethod insert-lexeme ((lexer incremental-lexer) pos lexeme) - (insert* (lexemes lexer) pos lexeme)) - -(defmethod delete-invalid-lexemes ((lexer incremental-lexer) from to) - "delete all lexemes between FROM and TO and return the first invalid -position in the lexemes of LEXER" - (with-slots (lexemes) lexer - (let ((start 1) - (end (nb-elements lexemes))) - ;; use binary search to find the first lexeme to delete - (loop while (< start end) - do (let ((middle (floor (+ start end) 2))) - (if (mark< (end-offset (element* lexemes middle)) from) - (setf start (1+ middle)) - (setf end middle)))) - ;; delete lexemes - (loop until (or (= start (nb-elements lexemes)) - (mark> (start-mark (element* lexemes start)) to)) - do (delete* lexemes start)) - start))) - -(defmethod skip-inter-lexeme-objects ((lexer incremental-lexer) scan) - (loop until (end-of-buffer-p scan) - while (inter-lexeme-object-p lexer (object-after scan)) - do (forward-object scan))) - -(defmethod update-lex ((lexer incremental-lexer) start-pos end) - (let ((scan (clone-mark (low-mark (buffer lexer)) :left))) - (setf (offset scan) - (end-offset (lexeme lexer (1- start-pos)))) - (loop do (skip-inter-lexeme-objects lexer scan) - until (if (end-of-buffer-p end) - (end-of-buffer-p scan) - (mark> scan end)) - do (let* ((start-mark (clone-mark scan)) - (lexeme (next-lexeme scan)) - (size (- (offset scan) (offset start-mark)))) - (setf (slot-value lexeme 'start-mark) start-mark - (slot-value lexeme 'size) size) - (insert-lexeme lexer start-pos lexeme)) - (incf start-pos)))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; ;;; grammar classes
(defclass html-sym (parse-tree) - ((badness :initform 0 :initarg :badness :reader badness) - (message :initform "" :initarg :message :reader message))) + ((badness :initform 0 :initarg :badness :reader badness)))
(defmethod parse-tree-better ((t1 html-sym) (t2 html-sym)) (and (eq (class-of t1) (class-of t2)) @@ -194,7 +112,7 @@ (defclass word (html-element) ()) (defclass delimiter (html-element) ())
-(defun next-lexeme (scan) +(defmethod next-lexeme ((lexer html-lexer) scan) (flet ((fo () (forward-object scan))) (let ((object (object-after scan))) (case object
Index: climacs/packages.lisp diff -u climacs/packages.lisp:1.56 climacs/packages.lisp:1.57 --- climacs/packages.lisp:1.56 Sun Mar 13 21:51:48 2005 +++ climacs/packages.lisp Tue Mar 15 06:39:24 2005 @@ -92,8 +92,15 @@ (:export #:syntax #:define-syntax #:basic-syntax #:update-syntax #:update-syntax-for-display - #:grammar #:parser #:initial-state + #:grammar #:grammar-rule #:add-rule + #:parser #:initial-state #:advance-parse + #:parse-tree #:start-offset #:end-offset + #:start-mark ; FIXME remove this + #:lexer #:nb-lexemes #:lexeme #:insert-lexeme + #:incremental-lexer #:next-lexeme + #:delete-invalid-lexemes #:inter-lexeme-object-p + #:skip-inter-lexeme-objects #:update-lex #:parse-stack-top #:target-parse-tree #:parse-state-empty-p #:parse-stack-next #:parse-stack-symbol #:parse-stack-parse-trees #:map-over-parse-trees
Index: climacs/syntax.lisp diff -u climacs/syntax.lisp:1.37 climacs/syntax.lisp:1.38 --- climacs/syntax.lisp:1.37 Tue Mar 15 05:31:59 2005 +++ climacs/syntax.lisp Tue Mar 15 06:39:24 2005 @@ -82,6 +82,92 @@ ;;; ;;; Incremental Earley parser
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; parse tree + +(defclass parse-tree () + ((start-mark :initarg :start-mark :reader start-mark) + (size :initarg :size))) + +(defgeneric start-offset (parse-tree)) + +(defmethod start-offset ((tree parse-tree)) + (offset (start-mark tree))) + +(defgeneric end-offset (parse-tree)) + +(defmethod end-offset ((tree parse-tree)) + (with-slots (start-mark size) tree + (+ (offset start-mark) size))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; lexer + +(defclass lexer () + ((buffer :initarg :buffer :reader buffer))) + +(defgeneric nb-lexemes (lexer)) +(defgeneric lexeme (lexer pos)) +(defgeneric insert-lexeme (lexer pos lexeme)) +(defgeneric delete-invalid-lexemes (lexer from to)) +(defgeneric inter-lexeme-object-p (lexer object)) +(defgeneric skip-inter-lexeme-objects (lexer scan)) +(defgeneric update-lex (lexer start-pos end)) +(defgeneric next-lexeme (lexer scan)) + +(defclass incremental-lexer (lexer) + ((lexemes :initform (make-instance 'standard-flexichain) :reader lexemes))) + +(defmethod nb-lexemes ((lexer incremental-lexer)) + (nb-elements (lexemes lexer))) + +(defmethod lexeme ((lexer incremental-lexer) pos) + (element* (lexemes lexer) pos)) + +(defmethod insert-lexeme ((lexer incremental-lexer) pos lexeme) + (insert* (lexemes lexer) pos lexeme)) + +(defmethod delete-invalid-lexemes ((lexer incremental-lexer) from to) + "delete all lexemes between FROM and TO and return the first invalid +position in the lexemes of LEXER" + (with-slots (lexemes) lexer + (let ((start 1) + (end (nb-elements lexemes))) + ;; use binary search to find the first lexeme to delete + (loop while (< start end) + do (let ((middle (floor (+ start end) 2))) + (if (mark< (end-offset (element* lexemes middle)) from) + (setf start (1+ middle)) + (setf end middle)))) + ;; delete lexemes + (loop until (or (= start (nb-elements lexemes)) + (mark> (start-mark (element* lexemes start)) to)) + do (delete* lexemes start)) + start))) + +(defmethod skip-inter-lexeme-objects ((lexer incremental-lexer) scan) + (loop until (end-of-buffer-p scan) + while (inter-lexeme-object-p lexer (object-after scan)) + do (forward-object scan))) + +(defmethod update-lex ((lexer incremental-lexer) start-pos end) + (let ((scan (clone-mark (low-mark (buffer lexer)) :left))) + (setf (offset scan) + (end-offset (lexeme lexer (1- start-pos)))) + (loop do (skip-inter-lexeme-objects lexer scan) + until (if (end-of-buffer-p end) + (end-of-buffer-p scan) + (mark> scan end)) + do (let* ((start-mark (clone-mark scan)) + (lexeme (next-lexeme lexer scan)) + (size (- (offset scan) (offset start-mark)))) + (setf (slot-value lexeme 'start-mark) start-mark + (slot-value lexeme 'size) size) + (insert-lexeme lexer start-pos lexeme)) + (incf start-pos)))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; grammar @@ -92,9 +178,10 @@ (symbols :initarg :symbols :reader symbols)))
(defclass grammar () - ((rules :initarg :rules :reader rules))) + ((rules :initarg :rules :accessor rules)))
-(defmacro grammar (&body body) +(defmacro grammar-rule ((left-hand-side arrow arglist &body body)) + (declare (ignore arrow)) (labels ((var-of (arg) (if (symbolp arg) arg @@ -110,25 +197,33 @@ ((symbolp (cadr arg)) t) (t (cadr arg)))) (build-rule (arglist body) - (if (null arglist) - body - (let ((arg (car arglist))) - `(lambda (,(var-of arg)) - (when (and (typep ,(var-of arg) ',(sym-of arg)) - ,(test-of arg)) - ,(build-rule (cdr arglist) body)))))) - (make-rule (rule) - `(make-instance 'rule - :left-hand-side ',(car rule) - :right-hand-side - ,(build-rule (caddr rule) - (if (or (= (length rule) 3) - (symbolp (cadddr rule))) - `(make-instance ',(car rule) ,@(cdddr rule)) - `(progn ,@(cdddr rule)))) - :symbols ,(coerce (mapcar #'sym-of (caddr rule)) 'vector)))) - `(make-instance 'grammar - :rules (list ,@(mapcar #'make-rule body))))) + (if (null arglist) + body + (let ((arg (car arglist))) + `(lambda (,(var-of arg)) + (when (and (typep ,(var-of arg) ',(sym-of arg)) + ,(test-of arg)) + ,(build-rule (cdr arglist) body))))))) + `(make-instance 'rule + :left-hand-side ',left-hand-side + :right-hand-side + ,(build-rule arglist + (if (or (null body) + (symbolp (car body))) + `(make-instance ',left-hand-side ,@body) + `(progn ,@body))) + :symbols ,(coerce (mapcar #'sym-of arglist) 'vector)))) + + +(defmacro grammar (&body body) + `(make-instance 'grammar + :rules (list ,@(loop for rule in body + collect `(grammar-rule ,rule))))) + +(defgeneric add-rule (rule grammar)) + +(defmethod add-rule (rule (grammar grammar)) + (push rule (rules grammar)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;