Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv8821
Modified Files:
syntax.lisp html-syntax.lisp
Log Message:
Prediction is now done at the beginning of advance-parse, which
means the next token is available to the predictor.
Added a :predict-test to the add-rule macro making it possible to
control when prediction is reasonable.
Added :predict-test to a few rules of HTML syntax to speed up the parser.
Date: Mon May 9 15:12:47 2005
Author: rstrandh
Index: climacs/syntax.lisp
diff -u climacs/syntax.lisp:1.48 climacs/syntax.lisp:1.49
--- climacs/syntax.lisp:1.48 Mon May 2 11:05:00 2005
+++ climacs/syntax.lisp Mon May 9 15:12:47 2005
@@ -205,6 +205,7 @@
((left-hand-side :initarg :left-hand-side :reader left-hand-side)
(right-hand-side :initarg :right-hand-side :reader right-hand-side)
(symbols :initarg :symbols :reader symbols)
+ (predict-test :initarg :predict-test :reader predict-test)
(number)))
(defclass grammar ()
@@ -212,7 +213,7 @@
(hash :initform (make-hash-table) :accessor hash)
(number-of-rules :initform 0)))
-(defmacro grammar-rule ((left-hand-side arrow arglist &body body))
+(defmacro grammar-rule ((left-hand-side arrow arglist &body body) &key predict-test)
(declare (ignore arrow))
(labels ((var-of (arg)
(if (symbolp arg)
@@ -244,7 +245,8 @@
(symbolp (car body)))
`(make-instance ',left-hand-side ,@body)
`(progn ,@body)))
- :symbols ,(coerce (mapcar #'sym-of arglist) 'vector))))
+ :symbols ,(coerce (mapcar #'sym-of arglist) 'vector)
+ :predict-test ,predict-test)))
(defmacro grammar (&body body)
@@ -308,14 +310,15 @@
(cond ((null remaining)
nil)
((functionp remaining)
- (handle-incomplete-item (make-instance 'incomplete-item
- :orig-state (orig-state prev-item)
- :predicted-from (predicted-from prev-item)
- :rule (rule prev-item)
- :dot-position (1+ (dot-position prev-item))
- :parse-trees (cons parse-tree (parse-trees prev-item))
- :suffix remaining)
- orig-state to-state))
+ (handle-incomplete-item
+ (make-instance 'incomplete-item
+ :orig-state (orig-state prev-item)
+ :predicted-from (predicted-from prev-item)
+ :rule (rule prev-item)
+ :dot-position (1+ (dot-position prev-item))
+ :parse-trees (cons parse-tree (parse-trees prev-item))
+ :suffix remaining)
+ orig-state to-state))
(t
(let* ((parse-trees (cons parse-tree (parse-trees prev-item)))
(start (find-if-not #'null parse-trees
@@ -389,30 +392,45 @@
(t (push parse-tree (gethash from-state parse-trees))
(handle-parse-tree))))))
+(defun predict (item state tokens)
+ (dolist (rule (gethash (aref (symbols (rule item)) (dot-position item))
+ (hash (parser-grammar (parser state)))))
+ (if (functionp (right-hand-side rule))
+ (let ((predicted-rules (slot-value state 'predicted-rules))
+ (rule-number (slot-value rule 'number))
+ (predict-test (predict-test rule)))
+ (when (zerop (sbit predicted-rules rule-number))
+ (setf (sbit predicted-rules rule-number) 1)
+ (when (or (null predict-test)
+ (some predict-test tokens))
+ (handle-and-predict-incomplete-item
+ (make-instance 'incomplete-item
+ :orig-state state
+ :predicted-from item
+ :rule rule
+ :dot-position 0
+ :suffix (right-hand-side rule))
+ state tokens))))
+ (potentially-handle-parse-tree (right-hand-side rule) state state)))
+ (loop for parse-tree in (gethash state (parse-trees state))
+ do (derive-and-handle-item item parse-tree state state)))
+
(defun handle-incomplete-item (item orig-state to-state)
(declare (optimize speed))
(cond ((find item (the list (gethash orig-state (incomplete-items to-state)))
:test #'item-equal)
nil)
(t
- (push item (gethash orig-state (incomplete-items to-state)))
- (dolist (rule (gethash (aref (symbols (rule item)) (dot-position item))
- (hash (parser-grammar (parser to-state)))))
- (if (functionp (right-hand-side rule))
- (let ((predicted-rules (slot-value to-state 'predicted-rules))
- (rule-number (slot-value rule 'number)))
- (when (zerop (sbit predicted-rules rule-number))
- (setf (sbit predicted-rules rule-number) 1)
- (handle-incomplete-item (make-instance 'incomplete-item
- :orig-state to-state
- :predicted-from item
- :rule rule
- :dot-position 0
- :suffix (right-hand-side rule))
- to-state to-state)))
- (potentially-handle-parse-tree (right-hand-side rule) to-state to-state)))
- (loop for parse-tree in (gethash to-state (parse-trees to-state))
- do (derive-and-handle-item item parse-tree to-state to-state)))))
+ (push item (gethash orig-state (incomplete-items to-state))))))
+
+(defun handle-and-predict-incomplete-item (item state tokens)
+ (declare (optimize speed))
+ (cond ((find item (the list (gethash state (incomplete-items state)))
+ :test #'item-equal)
+ nil)
+ (t
+ (push item (gethash state (incomplete-items state)))
+ (predict item state tokens))))
(defmethod initialize-instance :after ((parser parser) &rest args)
(declare (ignore args))
@@ -424,13 +442,14 @@
(or (subtypep (target parser) sym)
(subtypep sym (target parser))))
(if (functionp (right-hand-side rule))
- (handle-incomplete-item (make-instance 'incomplete-item
- :orig-state initial-state
- :predicted-from nil
- :rule rule
- :dot-position 0
- :suffix (right-hand-side rule))
- initial-state initial-state)
+ (handle-incomplete-item
+ (make-instance 'incomplete-item
+ :orig-state initial-state
+ :predicted-from nil
+ :rule rule
+ :dot-position 0
+ :suffix (right-hand-side rule))
+ initial-state initial-state)
(potentially-handle-parse-tree
(right-hand-side rule) initial-state initial-state))))))
@@ -442,6 +461,11 @@
do (return parse-tree)))
(defun advance-parse (parser tokens state)
+ (maphash (lambda (from-state items)
+ (declare (ignore from-state))
+ (dolist (item items)
+ (predict item state tokens)))
+ (incomplete-items state))
(let ((new-state (make-instance 'parser-state :parser parser)))
(loop for token in tokens
do (potentially-handle-parse-tree token state new-state))
Index: climacs/html-syntax.lisp
diff -u climacs/html-syntax.lisp:1.29 climacs/html-syntax.lisp:1.30
--- climacs/html-syntax.lisp:1.29 Mon Apr 11 08:27:13 2005
+++ climacs/html-syntax.lisp Mon May 9 15:12:47 2005
@@ -22,6 +22,11 @@
(in-package :climacs-html-syntax)
+(define-syntax html-syntax ("HTML" (basic-syntax))
+ ((lexer :reader lexer)
+ (valid-parse :initform 1)
+ (parser)))
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; grammar classes
@@ -40,6 +45,31 @@
(defclass html-tag (html-token) ())
+(defclass html-start-tag (html-tag)
+ ((start :initarg :start)
+ (name :initarg :name)
+ (attributes :initform nil :initarg :attributes)
+ (end :initarg :end)))
+
+(defmethod display-parse-tree ((entity html-start-tag) (syntax html-syntax) pane)
+ (with-slots (start name attributes end) entity
+ (display-parse-tree start syntax pane)
+ (display-parse-tree name syntax pane)
+ (unless (null attributes)
+ (display-parse-tree attributes syntax pane))
+ (display-parse-tree end syntax pane)))
+
+(defclass html-end-tag (html-tag)
+ ((start :initarg :start)
+ (name :initarg :name)
+ (end :initarg :end)))
+
+(defmethod display-parse-tree ((entity html-end-tag) (syntax html-syntax) pane)
+ (with-slots (start name attributes end) entity
+ (display-parse-tree start syntax pane)
+ (display-parse-tree name syntax pane)
+ (display-parse-tree end syntax pane)))
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; lexer
@@ -74,19 +104,15 @@
(t
(fo) (make-instance 'delimiter))))))))
-(define-syntax html-syntax ("HTML" (basic-syntax))
- ((lexer :reader lexer)
- (valid-parse :initform 1)
- (parser)))
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; parser
(defparameter *html-grammar* (grammar))
-(defmacro add-html-rule (rule)
- `(add-rule (grammar-rule ,rule) *html-grammar*))
+(defmacro add-html-rule (rule &key predict-test)
+ `(add-rule (grammar-rule ,rule :predict-test ,predict-test)
+ *html-grammar*))
(defun word-is (word string)
(string-equal (coerce (buffer-sequence (buffer word) (start-offset word) (end-offset word)) 'string)
@@ -94,23 +120,27 @@
(defmacro define-start-tag (name string)
`(progn
- (defclass ,name (html-tag) ())
+ (defclass ,name (html-start-tag) ())
(add-html-rule
(,name -> (start-tag-start
(word (and (= (end-offset start-tag-start) (start-offset word))
(word-is word ,string)))
- (tag-end (= (end-offset word) (start-offset tag-end))))))))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start start-tag-start :name word :end tag-end))))
(defmacro define-end-tag (name string)
`(progn
- (defclass ,name (html-tag) ())
+ (defclass ,name (html-end-tag) ())
(add-html-rule
(,name -> (end-tag-start
(word (and (= (end-offset end-tag-start) (start-offset word))
(word-is word ,string)))
- (tag-end (= (end-offset word) (start-offset tag-end))))))))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start end-tag-start :name word :end tag-end)
+ :predict-test (lambda (token)
+ (typep token 'end-tag-start)))))
(defmacro define-tag-pair (start-name end-name string)
`(progn (define-start-tag ,start-name ,string)
@@ -310,7 +340,9 @@
(defclass $inline (html-nonterminal)
((contents :initarg :contents)))
-(add-html-rule ($inline -> (inline-element) :contents inline-element))
+(add-html-rule ($inline -> (inline-element) :contents inline-element)
+ :predict-test (lambda (token)
+ (typep token 'start-tag-start)))
(add-html-rule ($inline -> (word) :contents word))
(add-html-rule ($inline -> (delimiter) :contents delimiter))
@@ -326,7 +358,9 @@
((contents :initarg :contents)))
(add-html-rule ($flow -> ($inline) :contents $inline))
-(add-html-rule ($flow -> (block-level-element) :contents block-level-element))
+(add-html-rule ($flow -> (block-level-element) :contents block-level-element)
+ :predict-test (lambda (token)
+ (typep token 'start-tag-start)))
(defmethod display-parse-tree ((entity $flow) (syntax html-syntax) pane)
(with-slots (contents) entity
@@ -379,11 +413,7 @@
(define-list <a>-attributes <a>-attribute)
-(defclass <a> (html-tag)
- ((start :initarg :start)
- (name :initarg :name)
- (attributes :initarg :attributes)
- (end :initarg :end)))
+(defclass <a> (html-start-tag) ())
(add-html-rule (<a> -> (start-tag-start
(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -392,13 +422,6 @@
tag-end)
:start start-tag-start :name word :attributes <a>-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <a>) (syntax html-syntax) pane)
- (with-slots (start name attributes end) entity
- (display-parse-tree start syntax pane)
- (display-parse-tree name syntax pane)
- (display-parse-tree attributes syntax pane)
- (display-parse-tree end syntax pane)))
-
(define-end-tag </a> "a")
(defclass a-element (inline-element)
@@ -431,11 +454,7 @@
;;;;;;;;;;;;;;; p element
-(defclass <p> (html-tag)
- ((start :initarg :start)
- (name :initarg :name)
- (attributes :initarg :attributes)
- (end :initarg :end)))
+(defclass <p> (html-start-tag) ())
(add-html-rule (<p> -> (start-tag-start
(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -444,13 +463,6 @@
tag-end)
:start start-tag-start :name word :attributes common-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <p>) (syntax html-syntax) pane)
- (with-slots (start name attributes end) entity
- (display-parse-tree start syntax pane)
- (display-parse-tree name syntax pane)
- (display-parse-tree attributes syntax pane)
- (display-parse-tree end syntax pane)))
-
(define-end-tag </p> "p")
(defclass p-element (block-level-element)
@@ -469,11 +481,7 @@
;;;;;;;;;;;;;;; li element
-(defclass <li> (html-tag)
- ((start :initarg :start)
- (name :initarg :name)
- (attributes :initarg :attributes)
- (end :initarg :end)))
+(defclass <li> (html-start-tag) ())
(add-html-rule (<li> -> (start-tag-start
(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -485,13 +493,6 @@
:attributes common-attributes
:end tag-end))
-(defmethod display-parse-tree ((entity <li>) (syntax html-syntax) pane)
- (with-slots (start name attributes end) entity
- (display-parse-tree start syntax pane)
- (display-parse-tree name syntax pane)
- (display-parse-tree attributes syntax pane)
- (display-parse-tree end syntax pane)))
-
(define-end-tag </li> "li")
(defclass li-element (html-nonterminal)
@@ -513,11 +514,7 @@
;;;;;;;;;;;;;;; ul element
-(defclass <ul> (html-tag)
- ((start :initarg :start)
- (name :initarg :name)
- (attributes :initarg :attributes)
- (end :initarg :end)))
+(defclass <ul> (html-start-tag) ())
(add-html-rule (<ul> -> (start-tag-start
(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -529,13 +526,6 @@
:attributes common-attributes
:end tag-end))
-(defmethod display-parse-tree ((entity <ul>) (syntax html-syntax) pane)
- (with-slots (start name attributes end) entity
- (display-parse-tree start syntax pane)
- (display-parse-tree name syntax pane)
- (display-parse-tree attributes syntax pane)
- (display-parse-tree end syntax pane)))
-
(define-end-tag </ul> "ul")
(define-nonempty-list li-elements li-element)
@@ -624,11 +614,7 @@
(define-list <html>-attributes <html>-attribute)
-(defclass <html> (html-tag)
- ((start :initarg :start)
- (name :initarg :name)
- (attributes :initarg :attributes)
- (end :initarg :end)))
+(defclass <html> (html-start-tag) ())
(add-html-rule (<html> -> (start-tag-start
(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -637,13 +623,6 @@
tag-end)
:start start-tag-start :name word :attributes <html>-attributes :end tag-end))
-(defmethod display-parse-tree ((entity <html>) (syntax html-syntax) pane)
- (with-slots (start name attributes end) entity
- (display-parse-tree start syntax pane)
- (display-parse-tree name syntax pane)
- (display-parse-tree attributes syntax pane)
- (display-parse-tree end syntax pane)))
-
(define-end-tag </html> "html")
(defclass html (html-nonterminal)
@@ -736,13 +715,14 @@
(when (and (end-offset entity) (mark> (end-offset entity) top))
(call-next-method))))
-(defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
+(defmethod display-parse-tree ((entity html-lexeme) (syntax html-syntax) pane)
(flet ((cache-test (t1 t2)
- (and (eq t1 t2)
- (eq (slot-value t1 'ink)
- (medium-ink (sheet-medium pane)))
- (eq (slot-value t1 'face)
- (text-style-face (medium-text-style (sheet-medium pane)))))))
+ (let ((result (and (eq t1 t2)
+ (eq (slot-value t1 'ink)
+ (medium-ink (sheet-medium pane)))
+ (eq (slot-value t1 'face)
+ (text-style-face (medium-text-style (sheet-medium pane)))))))
+ result)))
(updating-output (pane :unique-id entity
:id-test #'eq
:cache-value entity
@@ -761,7 +741,7 @@
(with-drawing-options (pane :ink +green4+)
(call-next-method)))
-(defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane)
+(defmethod display-parse-tree :before ((entity html-lexeme) (syntax html-syntax) pane)
(handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
(setf *white-space-start* (end-offset entity)))