Update of /project/climacs/cvsroot/climacs In directory common-lisp.net:/tmp/cvs-serv23495
Modified Files: gui.lisp html-syntax.lisp packages.lisp Log Message: Improvements to HTML syntax. This syntax module now uses an incremental lexer, and and incremental parser based on the existing Earley parser in syntax.lisp.
Removed backward-to-error and forward-to-error, since I am not sure that these are what we want.
Date: Mon Feb 28 09:51:36 2005 Author: rstrandh
Index: climacs/gui.lisp diff -u climacs/gui.lisp:1.125 climacs/gui.lisp:1.126 --- climacs/gui.lisp:1.125 Sun Feb 27 19:52:01 2005 +++ climacs/gui.lisp Mon Feb 28 09:51:33 2005 @@ -1282,18 +1282,6 @@ (syntax (syntax (buffer pane)))) (end-of-paragraph point syntax)))
-(define-named-command com-backward-to-error () - (let* ((pane (current-window)) - (point (point pane)) - (syntax (syntax (buffer pane)))) - (display-message "~a" (backward-to-error point syntax)))) - -(define-named-command com-forward-to-error () - (let* ((pane (current-window)) - (point (point pane)) - (syntax (syntax (buffer pane)))) - (display-message "~a" (forward-to-error point syntax)))) - (define-named-command com-eval-expression ((insertp 'boolean :prompt "Insert?")) (let* ((*package* (find-package :climacs-gui)) (string (handler-case (accept 'string :prompt "Eval")
Index: climacs/html-syntax.lisp diff -u climacs/html-syntax.lisp:1.3 climacs/html-syntax.lisp:1.4 --- climacs/html-syntax.lisp:1.3 Sat Feb 5 07:49:53 2005 +++ climacs/html-syntax.lisp Mon Feb 28 09:51:34 2005 @@ -34,183 +34,237 @@ (and (eq (class-of t1) (class-of t2)) (< (badness t1) (badness t2))))
-(defclass html (html-sym) ()) -(defclass head (html-sym) ()) -(defclass title (html-sym) ()) -(defclass body (html-sym) ()) -(defclass h1 (html-sym) ()) -(defclass h2 (html-sym) ()) -(defclass h3 (html-sym) ()) -(defclass para (html-sym) ()) -(defclass ul (html-sym) ()) -(defclass li (html-sym) ()) -(defclass texts (html-sym) ()) - -(defclass error-token (html-sym) ()) -(defclass text (html-sym) ()) - -(defclass <html> (html-sym) ()) -(defclass </html> (html-sym) ()) -(defclass <head> (html-sym) ()) -(defclass </head> (html-sym) ()) -(defclass <title> (html-sym) ()) -(defclass </title> (html-sym) ()) -(defclass <body> (html-sym) ()) -(defclass </body> (html-sym) ()) -(defclass <h1> (html-sym) ()) -(defclass </h1> (html-sym) ()) -(defclass <h2> (html-sym) ()) -(defclass </h2> (html-sym) ()) -(defclass <h3> (html-sym) ()) -(defclass </h3> (html-sym) ()) -(defclass <p> (html-sym) ()) -(defclass </p> (html-sym) ()) -(defclass <ul> (html-sym) ()) -(defclass </ul> (html-sym) ()) -(defclass <li> (html-sym) ()) -(defclass </li> (html-sym) ()) +(defclass words (html-sym) ()) + +(defclass empty-words (words) ()) + +(defclass nonempty-words (words) + ((words :initarg :words) + (word :initarg :word))) + +(defclass html-balanced (html-sym) + ((start :initarg :start) + (end :initarg :end))) + +(defclass html (html-balanced) + ((head :initarg :head) + (body :initarg :body))) + +(defclass head (html-balanced) + ((title :initarg :title))) + +(defclass html-words (html-balanced) + ((words :initarg :words))) + +(defclass title (html-words) ()) +(defclass body (html-words) ()) +(defclass h1 (html-words) ()) +(defclass h2 (html-words) ()) +(defclass h3 (html-words) ()) +(defclass para (html-words) ()) + +(defclass html-token (html-sym) + ((start-mark :initarg :start-mark :reader start-mark) + (size :initarg :size))) + +(defgeneric end-offset (html-token)) + +(defmethod end-offset ((token html-token)) + (with-slots (start-mark size) token + (+ (offset start-mark) size))) + +(defgeneric start-offset (html-token)) + +(defmethod start-offset ((token html-token)) + (offset (start-mark token))) + +(defclass <html> (html-token) () (:default-initargs :size 6)) +(defclass </html> (html-token) ()(:default-initargs :size 7)) +(defclass <head> (html-token) () (:default-initargs :size 6)) +(defclass </head> (html-token) () (:default-initargs :size 7)) +(defclass <title> (html-token) () (:default-initargs :size 7)) +(defclass </title> (html-token) () (:default-initargs :size 8)) +(defclass <body> (html-token) () (:default-initargs :size 6)) +(defclass </body> (html-token) () (:default-initargs :size 7)) +(defclass <h1> (html-token) () (:default-initargs :size 4)) +(defclass </h1> (html-token) () (:default-initargs :size 5)) +(defclass <h2> (html-token) () (:default-initargs :size 4)) +(defclass </h2> (html-token) () (:default-initargs :size 5)) +(defclass <h3> (html-token) () (:default-initargs :size 4)) +(defclass </h3> (html-token) () (:default-initargs :size 5)) +(defclass <p> (html-token) () (:default-initargs :size 3)) +(defclass </p> (html-token) () (:default-initargs :size 4)) +(defclass <ul> (html-token) () (:default-initargs :size 4)) +(defclass </ul> (html-token) () (:default-initargs :size 5)) +(defclass <li> (html-token) () (:default-initargs :size 4)) +(defclass </li> (html-token) () (:default-initargs :size 5))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; lexer
-(defparameter *token-table* - '(("<html>" . <html>) - ("</html>" . </html>) - ("<head>" . <head>) - ("</head>" . </head>) - ("<title>" . <title>) - ("</title>" . </title>) - ("<body>" . <body>) - ("</body>" . </body>) - ("<h1>" . <h1>) - ("</h1>" . </h1>) - ("<h2>" . <h2>) - ("</h2>" . </h2>) - ("<h3>" . <h3>) - ("</h3>" . </h3>) - ("<p>" . <p>) - ("</p>" . </p>) - ("<ul>" . <ul>) - ("</ul>" . </ul>) - ("<li>" . <li>) - ("</li>" . </li>))) - -(defclass html-lexer (lexer) - ((mark :initarg :mark))) - -(defmethod lex ((lexer html-lexer)) - (with-slots (mark) lexer - (assert (not (end-of-buffer-p mark))) - (cond ((or (end-of-line-p mark) - (not (eql (object-after mark) #<))) - (when (end-of-line-p mark) - (forward-object mark)) - (loop until (or (end-of-line-p mark) - (eql (object-after mark) #<)) - do (forward-object mark)) - (make-instance 'text)) - (t - (let ((offset (offset mark))) - (forward-object mark) - (loop until (or (end-of-line-p mark) - (whitespacep (object-after mark)) - (eql (object-before mark) #>)) - do (forward-object mark)) - (let* ((word (region-to-sequence offset mark)) - (class-name (cdr (assoc word *token-table* :test #'equalp)))) - (make-instance (or class-name 'error-token)))))))) +(defclass html-element (html-token) + ((state :initarg :state))) + +(defclass start-element (html-element) ()) +(defclass tag-start (html-element) ()) +(defclass tag-end (html-element) ()) +(defclass slash (html-element) ()) +(defclass word (html-element) ()) +(defclass delimiter (html-element) ()) + +(defun next-token (scan) + (let ((start-mark (clone-mark scan))) + (flet ((fo () (forward-object scan))) + (macrolet ((make-entry (type) + `(return-from next-token + (make-instance ,type :start-mark start-mark + :size (- (offset scan) (offset start-mark)))))) + (loop with object = (object-after scan) + until (end-of-buffer-p scan) + do (case object + (#< (fo) (make-entry 'tag-start)) + (#> (fo) (make-entry 'tag-end)) + (#/ (fo) (make-entry 'slash)) + (t (cond ((alphanumericp object) + (loop until (end-of-buffer-p scan) + while (alphanumericp (object-after scan)) + do (fo)) + (make-entry 'word)) + (t + (fo) (make-entry 'delimiter)))))))))) + +(define-syntax html-syntax ("HTML" (basic-syntax)) + ((tokens :initform (make-instance 'standard-flexichain)) + (guess-pos :initform 1) + (valid-parse :initform 1) + (parser)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; parser
+(defun word-is (word string) + (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string) + string)) + (defparameter *html-grammar* (grammar - (html -> (<html> head body </html>)) - (head -> (<head> title </head>)) - (title -> (<title> texts </title>)) - (body -> (<body> texts </body>)) - (texts -> ()) - (texts -> (texts text)))) - -(define-syntax html-syntax ("HTML" (basic-syntax)) - ((parser) - (states))) + (<html> -> (tag-start + (word (and (= (end-offset tag-start) (start-offset word)) + (word-is word "html"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (</html> -> (tag-start + (slash (= (end-offset tag-start) (start-offset slash))) + (word (and (= (end-offset slash) (start-offset word)) + (word-is word "html"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (<head> -> (tag-start + (word (and (= (end-offset tag-start) (start-offset word)) + (word-is word "head"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (</head> -> (tag-start + (slash (= (end-offset tag-start) (start-offset slash))) + (word (and (= (end-offset slash) (start-offset word)) + (word-is word "head"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (<title> -> (tag-start + (word (and (= (end-offset tag-start) (start-offset word)) + (word-is word "title"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (</title> -> (tag-start + (slash (= (end-offset tag-start) (start-offset slash))) + (word (and (= (end-offset slash) (start-offset word)) + (word-is word "title"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (<body> -> (tag-start + (word (and (= (end-offset tag-start) (start-offset word)) + (word-is word "body"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (</body> -> (tag-start + (slash (= (end-offset tag-start) (start-offset slash))) + (word (and (= (end-offset slash) (start-offset word)) + (word-is word "body"))) + (tag-end (= (end-offset word) (start-offset tag-end)))) + :start-mark (start-mark tag-start)) + (html -> (<html> head body </html>) + :start <html> :head head :body body :end </html>) + (head -> (<head> title </head>) + :start <head> :title title :end </head>) + (title -> (<title> words </title>) + :start <title> :words words :end </title>) + (body -> (<body> words </body>) + :start <body> :words words :end </body>) + (words -> () + (make-instance 'empty-words)) + (words -> (words word) + (make-instance 'nonempty-words :words words :word word))))
(defmethod initialize-instance :after ((syntax html-syntax) &rest args) (declare (ignore args)) - (with-slots (parser states buffer) syntax + (with-slots (parser tokens buffer) syntax (setf parser (make-instance 'parser :grammar *html-grammar* - :lexer (make-instance 'html-lexer - :mark (make-instance 'standard-left-sticky-mark :buffer buffer)) :target 'html)) - (setf states (list (cons (make-instance 'standard-left-sticky-mark :buffer buffer) - (initial-state parser)))))) + (insert* tokens 0 (make-instance 'start-element + :start-mark (make-instance 'standard-left-sticky-mark + :buffer buffer + :offset 0) + :size 0 + :state (initial-state parser))))) + +(defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot) + (with-slots (parser tokens valid-parse) syntax + (loop until (= valid-parse (nb-elements tokens)) + while (mark< (end-offset (element* tokens valid-parse)) bot) + do (let ((current-token (element* tokens (1- valid-parse))) + (next-token (element* tokens valid-parse))) + (setf (slot-value next-token 'state) + (advance-parse parser (list next-token) (slot-value current-token 'state)))) + (incf valid-parse))))
(defmethod update-syntax (buffer (syntax html-syntax)) - (let ((low-mark (low-mark buffer))) - (with-slots (parser states) syntax - (with-slots (lexer) parser - (with-slots (mark) lexer - (loop until (or (null (cdr states)) - (< (offset (caar states)) (offset low-mark))) - do (pop states)) - (setf (offset mark) (offset (caar states))) - (loop until (end-of-buffer-p mark) - do (let ((token (lex lexer))) - (push (cons (clone-mark mark) - (advance-parse parser (list token) (cdar states))) - states))))) - (print (find 'html (gethash (initial-state parser) (parse-trees (cdar states))) - :key #'type-of) - *query-io*) - (finish-output *query-io*)))) - -(defgeneric forward-to-error (point syntax)) -(defgeneric backward-to-error (point syntax)) - -(defun find-bad-parse-tree (state) - (maphash (lambda (key parse-trees) - (declare (ignore key)) - (let ((parse-tree (find-if (lambda (parse-tree) - (plusp (badness parse-tree))) - parse-trees))) - (when parse-tree - (return-from find-bad-parse-tree parse-tree)))) - (parse-trees state))) - -(defgeneric empty-state-p (state)) - -(defmethod empty-state-p (state) - (maphash (lambda (key val) - (declare (ignore key)) - (loop for parse-tree in val - do (return-from empty-state-p nil))) - (parse-trees state)) - (maphash (lambda (key val) - (declare (ignore key)) - (loop for parse-tree in val - do (return-from empty-state-p nil))) - (incomplete-items state))) - -(defmethod backward-to-error (point (syntax html-syntax)) - (let ((states (slot-value syntax 'states))) - ;; find the last state before point - (loop until (or (null states) - (mark< (caar states) point)) - do (pop states)) - (when (null states) - (return-from backward-to-error "no more errors")) - (when (empty-state-p (cdar states)) - (loop for ((m1 . s1) (m2 . s2)) on states - until (not (empty-state-p s2)) - finally (setf (offset point) (offset m1))) - (return-from backward-to-error "no valid parse from this point")) - (loop for (mark . state) in states - for tree = (find-bad-parse-tree state) - when tree - do (setf (offset point) (offset mark)) - (return (message tree)) - finally (return "no more errors")))) + (let ((low-mark (low-mark buffer)) + (high-mark (high-mark buffer)) + (scan)) + (with-slots (tokens guess-pos valid-parse) syntax + (when (mark<= low-mark high-mark) + ;; go back to a position before low-mark + (loop until (or (= guess-pos 1) + (mark< (end-offset (element* tokens (1- guess-pos))) low-mark)) + do (decf guess-pos)) + ;; go forward to the last position before low-mark + (loop with nb-elements = (nb-elements tokens) + until (or (= guess-pos nb-elements) + (mark>= (end-offset (element* tokens guess-pos)) low-mark)) + do (incf guess-pos)) + ;; mark valid parse + (setf valid-parse guess-pos) + ;; delete entries that must be reparsed + (loop until (or (= guess-pos (nb-elements tokens)) + (mark> (start-mark (element* tokens guess-pos)) high-mark)) + do (delete* tokens guess-pos)) + (setf scan (make-instance 'standard-left-sticky-mark + :buffer buffer + :offset (if (zerop guess-pos) + 0 + (end-offset (element* tokens (1- guess-pos)))))) + ;; scan + (loop with start-mark = nil + do (loop until (end-of-buffer-p scan) + while (whitespacep (object-after scan)) + do (forward-object scan)) + until (if (end-of-buffer-p high-mark) + (end-of-buffer-p scan) + (mark> scan high-mark)) + do (setf start-mark (clone-mark scan)) + (insert* tokens guess-pos (next-token scan)) + (incf guess-pos)))))) +
Index: climacs/packages.lisp diff -u climacs/packages.lisp:1.51 climacs/packages.lisp:1.52 --- climacs/packages.lisp:1.51 Sun Feb 27 19:52:01 2005 +++ climacs/packages.lisp Mon Feb 28 09:51:35 2005 @@ -91,8 +91,7 @@ #:basic-syntax #:update-syntax #:update-syntax-for-display #:syntax-line-indentation - #:beginning-of-paragraph #:end-of-paragraph - #:forward-to-error #:backward-to-error)) + #:beginning-of-paragraph #:end-of-paragraph))
(defpackage :climacs-cl-syntax (:use :clim-lisp :clim :climacs-buffer :climacs-base :flexichain :climacs-syntax)