Update of /project/climacs/cvsroot/climacs In directory common-lisp.net:/tmp/cvs-serv8821
Modified Files: syntax.lisp html-syntax.lisp Log Message: Prediction is now done at the beginning of advance-parse, which means the next token is available to the predictor.
Added a :predict-test to the add-rule macro making it possible to control when prediction is reasonable.
Added :predict-test to a few rules of HTML syntax to speed up the parser.
Date: Mon May 9 15:12:47 2005 Author: rstrandh
Index: climacs/syntax.lisp diff -u climacs/syntax.lisp:1.48 climacs/syntax.lisp:1.49 --- climacs/syntax.lisp:1.48 Mon May 2 11:05:00 2005 +++ climacs/syntax.lisp Mon May 9 15:12:47 2005 @@ -205,6 +205,7 @@ ((left-hand-side :initarg :left-hand-side :reader left-hand-side) (right-hand-side :initarg :right-hand-side :reader right-hand-side) (symbols :initarg :symbols :reader symbols) + (predict-test :initarg :predict-test :reader predict-test) (number)))
(defclass grammar () @@ -212,7 +213,7 @@ (hash :initform (make-hash-table) :accessor hash) (number-of-rules :initform 0)))
-(defmacro grammar-rule ((left-hand-side arrow arglist &body body)) +(defmacro grammar-rule ((left-hand-side arrow arglist &body body) &key predict-test) (declare (ignore arrow)) (labels ((var-of (arg) (if (symbolp arg) @@ -244,7 +245,8 @@ (symbolp (car body))) `(make-instance ',left-hand-side ,@body) `(progn ,@body))) - :symbols ,(coerce (mapcar #'sym-of arglist) 'vector)))) + :symbols ,(coerce (mapcar #'sym-of arglist) 'vector) + :predict-test ,predict-test)))
(defmacro grammar (&body body) @@ -308,14 +310,15 @@ (cond ((null remaining) nil) ((functionp remaining) - (handle-incomplete-item (make-instance 'incomplete-item - :orig-state (orig-state prev-item) - :predicted-from (predicted-from prev-item) - :rule (rule prev-item) - :dot-position (1+ (dot-position prev-item)) - :parse-trees (cons parse-tree (parse-trees prev-item)) - :suffix remaining) - orig-state to-state)) + (handle-incomplete-item + (make-instance 'incomplete-item + :orig-state (orig-state prev-item) + :predicted-from (predicted-from prev-item) + :rule (rule prev-item) + :dot-position (1+ (dot-position prev-item)) + :parse-trees (cons parse-tree (parse-trees prev-item)) + :suffix remaining) + orig-state to-state)) (t (let* ((parse-trees (cons parse-tree (parse-trees prev-item))) (start (find-if-not #'null parse-trees @@ -389,30 +392,45 @@ (t (push parse-tree (gethash from-state parse-trees)) (handle-parse-tree))))))
+(defun predict (item state tokens) + (dolist (rule (gethash (aref (symbols (rule item)) (dot-position item)) + (hash (parser-grammar (parser state))))) + (if (functionp (right-hand-side rule)) + (let ((predicted-rules (slot-value state 'predicted-rules)) + (rule-number (slot-value rule 'number)) + (predict-test (predict-test rule))) + (when (zerop (sbit predicted-rules rule-number)) + (setf (sbit predicted-rules rule-number) 1) + (when (or (null predict-test) + (some predict-test tokens)) + (handle-and-predict-incomplete-item + (make-instance 'incomplete-item + :orig-state state + :predicted-from item + :rule rule + :dot-position 0 + :suffix (right-hand-side rule)) + state tokens)))) + (potentially-handle-parse-tree (right-hand-side rule) state state))) + (loop for parse-tree in (gethash state (parse-trees state)) + do (derive-and-handle-item item parse-tree state state))) + (defun handle-incomplete-item (item orig-state to-state) (declare (optimize speed)) (cond ((find item (the list (gethash orig-state (incomplete-items to-state))) :test #'item-equal) nil) (t - (push item (gethash orig-state (incomplete-items to-state))) - (dolist (rule (gethash (aref (symbols (rule item)) (dot-position item)) - (hash (parser-grammar (parser to-state))))) - (if (functionp (right-hand-side rule)) - (let ((predicted-rules (slot-value to-state 'predicted-rules)) - (rule-number (slot-value rule 'number))) - (when (zerop (sbit predicted-rules rule-number)) - (setf (sbit predicted-rules rule-number) 1) - (handle-incomplete-item (make-instance 'incomplete-item - :orig-state to-state - :predicted-from item - :rule rule - :dot-position 0 - :suffix (right-hand-side rule)) - to-state to-state))) - (potentially-handle-parse-tree (right-hand-side rule) to-state to-state))) - (loop for parse-tree in (gethash to-state (parse-trees to-state)) - do (derive-and-handle-item item parse-tree to-state to-state))))) + (push item (gethash orig-state (incomplete-items to-state)))))) + +(defun handle-and-predict-incomplete-item (item state tokens) + (declare (optimize speed)) + (cond ((find item (the list (gethash state (incomplete-items state))) + :test #'item-equal) + nil) + (t + (push item (gethash state (incomplete-items state))) + (predict item state tokens))))
(defmethod initialize-instance :after ((parser parser) &rest args) (declare (ignore args)) @@ -424,13 +442,14 @@ (or (subtypep (target parser) sym) (subtypep sym (target parser)))) (if (functionp (right-hand-side rule)) - (handle-incomplete-item (make-instance 'incomplete-item - :orig-state initial-state - :predicted-from nil - :rule rule - :dot-position 0 - :suffix (right-hand-side rule)) - initial-state initial-state) + (handle-incomplete-item + (make-instance 'incomplete-item + :orig-state initial-state + :predicted-from nil + :rule rule + :dot-position 0 + :suffix (right-hand-side rule)) + initial-state initial-state) (potentially-handle-parse-tree (right-hand-side rule) initial-state initial-state))))))
@@ -442,6 +461,11 @@ do (return parse-tree)))
(defun advance-parse (parser tokens state) + (maphash (lambda (from-state items) + (declare (ignore from-state)) + (dolist (item items) + (predict item state tokens))) + (incomplete-items state)) (let ((new-state (make-instance 'parser-state :parser parser))) (loop for token in tokens do (potentially-handle-parse-tree token state new-state))
Index: climacs/html-syntax.lisp diff -u climacs/html-syntax.lisp:1.29 climacs/html-syntax.lisp:1.30 --- climacs/html-syntax.lisp:1.29 Mon Apr 11 08:27:13 2005 +++ climacs/html-syntax.lisp Mon May 9 15:12:47 2005 @@ -22,6 +22,11 @@
(in-package :climacs-html-syntax)
+(define-syntax html-syntax ("HTML" (basic-syntax)) + ((lexer :reader lexer) + (valid-parse :initform 1) + (parser))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; grammar classes @@ -40,6 +45,31 @@
(defclass html-tag (html-token) ())
+(defclass html-start-tag (html-tag) + ((start :initarg :start) + (name :initarg :name) + (attributes :initform nil :initarg :attributes) + (end :initarg :end))) + +(defmethod display-parse-tree ((entity html-start-tag) (syntax html-syntax) pane) + (with-slots (start name attributes end) entity + (display-parse-tree start syntax pane) + (display-parse-tree name syntax pane) + (unless (null attributes) + (display-parse-tree attributes syntax pane)) + (display-parse-tree end syntax pane))) + +(defclass html-end-tag (html-tag) + ((start :initarg :start) + (name :initarg :name) + (end :initarg :end))) + +(defmethod display-parse-tree ((entity html-end-tag) (syntax html-syntax) pane) + (with-slots (start name attributes end) entity + (display-parse-tree start syntax pane) + (display-parse-tree name syntax pane) + (display-parse-tree end syntax pane))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; lexer @@ -74,19 +104,15 @@ (t (fo) (make-instance 'delimiter))))))))
-(define-syntax html-syntax ("HTML" (basic-syntax)) - ((lexer :reader lexer) - (valid-parse :initform 1) - (parser))) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; parser
(defparameter *html-grammar* (grammar))
-(defmacro add-html-rule (rule) - `(add-rule (grammar-rule ,rule) *html-grammar*)) +(defmacro add-html-rule (rule &key predict-test) + `(add-rule (grammar-rule ,rule :predict-test ,predict-test) + *html-grammar*))
(defun word-is (word string) (string-equal (coerce (buffer-sequence (buffer word) (start-offset word) (end-offset word)) 'string) @@ -94,23 +120,27 @@
(defmacro define-start-tag (name string) `(progn - (defclass ,name (html-tag) ()) + (defclass ,name (html-start-tag) ())
(add-html-rule (,name -> (start-tag-start (word (and (= (end-offset start-tag-start) (start-offset word)) (word-is word ,string))) - (tag-end (= (end-offset word) (start-offset tag-end)))))))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start start-tag-start :name word :end tag-end))))
(defmacro define-end-tag (name string) `(progn - (defclass ,name (html-tag) ()) + (defclass ,name (html-end-tag) ())
(add-html-rule (,name -> (end-tag-start (word (and (= (end-offset end-tag-start) (start-offset word)) (word-is word ,string))) - (tag-end (= (end-offset word) (start-offset tag-end)))))))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start end-tag-start :name word :end tag-end) + :predict-test (lambda (token) + (typep token 'end-tag-start)))))
(defmacro define-tag-pair (start-name end-name string) `(progn (define-start-tag ,start-name ,string) @@ -310,7 +340,9 @@ (defclass $inline (html-nonterminal) ((contents :initarg :contents)))
-(add-html-rule ($inline -> (inline-element) :contents inline-element)) +(add-html-rule ($inline -> (inline-element) :contents inline-element) + :predict-test (lambda (token) + (typep token 'start-tag-start))) (add-html-rule ($inline -> (word) :contents word)) (add-html-rule ($inline -> (delimiter) :contents delimiter))
@@ -326,7 +358,9 @@ ((contents :initarg :contents)))
(add-html-rule ($flow -> ($inline) :contents $inline)) -(add-html-rule ($flow -> (block-level-element) :contents block-level-element)) +(add-html-rule ($flow -> (block-level-element) :contents block-level-element) + :predict-test (lambda (token) + (typep token 'start-tag-start)))
(defmethod display-parse-tree ((entity $flow) (syntax html-syntax) pane) (with-slots (contents) entity @@ -379,11 +413,7 @@
(define-list <a>-attributes <a>-attribute)
-(defclass <a> (html-tag) - ((start :initarg :start) - (name :initarg :name) - (attributes :initarg :attributes) - (end :initarg :end))) +(defclass <a> (html-start-tag) ())
(add-html-rule (<a> -> (start-tag-start (word (and (= (end-offset start-tag-start) (start-offset word)) @@ -392,13 +422,6 @@ tag-end) :start start-tag-start :name word :attributes <a>-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <a>) (syntax html-syntax) pane) - (with-slots (start name attributes end) entity - (display-parse-tree start syntax pane) - (display-parse-tree name syntax pane) - (display-parse-tree attributes syntax pane) - (display-parse-tree end syntax pane))) - (define-end-tag </a> "a")
(defclass a-element (inline-element) @@ -431,11 +454,7 @@
;;;;;;;;;;;;;;; p element
-(defclass <p> (html-tag) - ((start :initarg :start) - (name :initarg :name) - (attributes :initarg :attributes) - (end :initarg :end))) +(defclass <p> (html-start-tag) ())
(add-html-rule (<p> -> (start-tag-start (word (and (= (end-offset start-tag-start) (start-offset word)) @@ -444,13 +463,6 @@ tag-end) :start start-tag-start :name word :attributes common-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <p>) (syntax html-syntax) pane) - (with-slots (start name attributes end) entity - (display-parse-tree start syntax pane) - (display-parse-tree name syntax pane) - (display-parse-tree attributes syntax pane) - (display-parse-tree end syntax pane))) - (define-end-tag </p> "p")
(defclass p-element (block-level-element) @@ -469,11 +481,7 @@
;;;;;;;;;;;;;;; li element
-(defclass <li> (html-tag) - ((start :initarg :start) - (name :initarg :name) - (attributes :initarg :attributes) - (end :initarg :end))) +(defclass <li> (html-start-tag) ())
(add-html-rule (<li> -> (start-tag-start (word (and (= (end-offset start-tag-start) (start-offset word)) @@ -485,13 +493,6 @@ :attributes common-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <li>) (syntax html-syntax) pane) - (with-slots (start name attributes end) entity - (display-parse-tree start syntax pane) - (display-parse-tree name syntax pane) - (display-parse-tree attributes syntax pane) - (display-parse-tree end syntax pane))) - (define-end-tag </li> "li")
(defclass li-element (html-nonterminal) @@ -513,11 +514,7 @@
;;;;;;;;;;;;;;; ul element
-(defclass <ul> (html-tag) - ((start :initarg :start) - (name :initarg :name) - (attributes :initarg :attributes) - (end :initarg :end))) +(defclass <ul> (html-start-tag) ())
(add-html-rule (<ul> -> (start-tag-start (word (and (= (end-offset start-tag-start) (start-offset word)) @@ -529,13 +526,6 @@ :attributes common-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <ul>) (syntax html-syntax) pane) - (with-slots (start name attributes end) entity - (display-parse-tree start syntax pane) - (display-parse-tree name syntax pane) - (display-parse-tree attributes syntax pane) - (display-parse-tree end syntax pane))) - (define-end-tag </ul> "ul")
(define-nonempty-list li-elements li-element) @@ -624,11 +614,7 @@
(define-list <html>-attributes <html>-attribute)
-(defclass <html> (html-tag) - ((start :initarg :start) - (name :initarg :name) - (attributes :initarg :attributes) - (end :initarg :end))) +(defclass <html> (html-start-tag) ())
(add-html-rule (<html> -> (start-tag-start (word (and (= (end-offset start-tag-start) (start-offset word)) @@ -637,13 +623,6 @@ tag-end) :start start-tag-start :name word :attributes <html>-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <html>) (syntax html-syntax) pane) - (with-slots (start name attributes end) entity - (display-parse-tree start syntax pane) - (display-parse-tree name syntax pane) - (display-parse-tree attributes syntax pane) - (display-parse-tree end syntax pane))) - (define-end-tag </html> "html")
(defclass html (html-nonterminal) @@ -736,13 +715,14 @@ (when (and (end-offset entity) (mark> (end-offset entity) top)) (call-next-method))))
-(defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane) +(defmethod display-parse-tree ((entity html-lexeme) (syntax html-syntax) pane) (flet ((cache-test (t1 t2) - (and (eq t1 t2) - (eq (slot-value t1 'ink) - (medium-ink (sheet-medium pane))) - (eq (slot-value t1 'face) - (text-style-face (medium-text-style (sheet-medium pane))))))) + (let ((result (and (eq t1 t2) + (eq (slot-value t1 'ink) + (medium-ink (sheet-medium pane))) + (eq (slot-value t1 'face) + (text-style-face (medium-text-style (sheet-medium pane))))))) + result))) (updating-output (pane :unique-id entity :id-test #'eq :cache-value entity @@ -761,7 +741,7 @@ (with-drawing-options (pane :ink +green4+) (call-next-method)))
-(defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane) +(defmethod display-parse-tree :before ((entity html-lexeme) (syntax html-syntax) pane) (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity)) (setf *white-space-start* (end-offset entity)))