 
            Update of /project/cxml/cvsroot/cxml/xml In directory clnet:/tmp/cvs-serv26091/xml Modified Files: package.lisp xml-parse.lisp Log Message: klacks parser --- /project/cxml/cvsroot/cxml/xml/package.lisp 2006/12/02 13:21:37 1.13 +++ /project/cxml/cvsroot/cxml/xml/package.lisp 2007/02/11 18:21:21 1.14 @@ -83,4 +83,6 @@ #:make-namespace-normalizer #:make-whitespace-normalizer #:rod-to-utf8-string - #:utf8-string-to-rod)) + #:utf8-string-to-rod + + #:make-source)) --- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2006/09/16 07:52:59 1.64 +++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/02/11 18:21:22 1.65 @@ -68,11 +68,11 @@ ;; :stag (<name> . <atts>) ;start tag ;; :etag (<name> . <atts>) ;end tag ;; :ztag (<name> . <atts>) ;empty tag -;; :<!element -;; :<!entity -;; :<!attlist -;; :<!notation -;; :<!doctype +;; :<!ELEMENT +;; :<!ENTITY +;; :<!ATTLIST +;; :<!NOTATION +;; :<!DOCTYPE ;; :<![ ;; :comment <content> @@ -194,11 +194,13 @@ (defvar *expand-pe-p* nil) -(defparameter *namespace-bindings* +(defparameter *initial-namespace-bindings* '((#"" . nil) (#"xmlns" . #"http://www.w3.org/2000/xmlns/") (#"xml" . #"http://www.w3.org/XML/1998/namespace"))) +(defparameter *namespace-bindings* *initial-namespace-bindings*) + ;;;; --------------------------------------------------------------------------- ;;;; xstreams ;;;; @@ -2571,22 +2573,16 @@ :main-zstream input :entity-resolver entity-resolver :disallow-internal-subset disallow-internal-subset)) - (*validate* validate)) + (*validate* validate) + (*namespace-bindings* *initial-namespace-bindings*)) (sax:start-document handler) ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc* ;; Misc ::= Comment | PI | S ;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) - ;; - ;; we will use the attribute-value parser for the xml decl. (let ((*data-behaviour* :DTD)) ;; optional XMLDecl? - (cond ((eq (peek-token input) :xml-decl) - (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input)))))) - (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes)) - (setup-encoding input hd)) - (read-token input))) - (set-full-speed input) + (p/xmldecl input) ;; Misc* (p/misc*-2 input) ;; (doctypedecl Misc*)? @@ -2595,13 +2591,7 @@ (p/doctype-decl input dtd) (p/misc*-2 input)) (dtd - (let ((dummy (string->xstream "<!DOCTYPE dummy>"))) - (setf (xstream-name dummy) - (make-stream-name - :entity-name "dummy doctype" - :entity-kind :main - :uri (zstream-base-sysid input))) - (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd))) + (synthesize-doctype dtd input)) ((and validate (not dtd)) (validity-error "invalid document: no doctype"))) (ensure-dtd) @@ -2610,28 +2600,65 @@ (setf (model-stack *ctx*) (list (make-root-model root)))) ;; element (let ((*data-behaviour* :DOC)) - (when (eq (peek-token input) :seen-<) - (multiple-value-bind (c s) - (read-token-after-|<| input (car (zstream-input-stack input))) - (setf (zstream-token-category input) c - (zstream-token-semantic input) s))) + (fix-seen-< input) (p/element input)) ;; optional Misc* (p/misc*-2 input) - (unless (eq (peek-token input) :eof) - (wf-error input "Garbage at end of document.")) - (when *validate* - (maphash (lambda (k v) - (unless v - (validity-error "(11) IDREF: ~S not defined" (rod-string k)))) - (id-table *ctx*)) - - (dolist (name (referenced-notations *ctx*)) - (unless (find-notation name (dtd *ctx*)) - (validity-error "(23) Notation Declared: ~S" (rod-string name))))) + (p/eof input) (sax:end-document handler)))) +(defun synthesize-doctype (dtd input) + (let ((dummy (string->xstream "<!DOCTYPE dummy>"))) + (setf (xstream-name dummy) + (make-stream-name + :entity-name "dummy doctype" + :entity-kind :main + :uri (zstream-base-sysid input))) + (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd))) + +(defun fix-seen-< (input) + (when (eq (peek-token input) :seen-<) + (multiple-value-bind (c s) + (read-token-after-|<| input (car (zstream-input-stack input))) + (setf (zstream-token-category input) c + (zstream-token-semantic input) s)))) + +(defun p/xmldecl (input) + ;; we will use the attribute-value parser for the xml decl. + (prog1 + (when (eq (peek-token input) :xml-decl) + (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input)))))) + (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes)) + (setup-encoding input hd) + (read-token input) + hd)) + (set-full-speed input))) + +(defun p/eof (input) + (unless (eq (peek-token input) :eof) + (wf-error input "Garbage at end of document.")) + (when *validate* + (maphash (lambda (k v) + (unless v + (validity-error "(11) IDREF: ~S not defined" (rod-string k)))) + (id-table *ctx*)) + + (dolist (name (referenced-notations *ctx*)) + (unless (find-notation name (dtd *ctx*)) + (validity-error "(23) Notation Declared: ~S" (rod-string name)))))) + (defun p/element (input) + (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input) + (sax:start-element (handler *ctx*) uri lname qname attrs) + (when (eq cat :stag) + (let ((*namespace-bindings* n-b)) + (p/content input)) + (p/etag input qname)) + (sax:end-element (handler *ctx*) uri lname qname) + (undeclare-namespaces new-b) + (validate-end-element *ctx* qname))) + +(defun p/sztag (input) (multiple-value-bind (cat sem) (read-token input) (case cat ((:stag :ztag)) @@ -2657,28 +2684,39 @@ (setf attrs (remove-if (compose #'xmlns-attr-p #'sax:attribute-qname) attrs))) - (cond - ((eq cat :ztag) - (sax:start-element (handler *ctx*) uri local-name name attrs) - (sax:end-element (handler *ctx*) uri local-name name)) - - ((eq cat :stag) - (sax:start-element (handler *ctx*) uri local-name name attrs) - (p/content input) - (multiple-value-bind (cat2 sem2) (read-token input) - (unless (and (eq cat2 :etag) - (eq (car sem2) name)) - (wf-error input "Bad nesting. ~S / ~S" - (mu name) - (mu (cons cat2 sem2)))) - (when (cdr sem2) - (wf-error input "no attributes allowed in end tag"))) - (sax:end-element (handler *ctx*) uri local-name name)) - - (t - (wf-error input "Expecting element, got ~S." cat)))) - (undeclare-namespaces new-namespaces)) - (validate-end-element *ctx* name)))) + (values cat + *namespace-bindings* + new-namespaces + uri local-name name attrs)))))) + +(defun p/etag (input qname) + (multiple-value-bind (cat2 sem2) (read-token input) + (unless (and (eq cat2 :etag) + (eq (car sem2) qname)) + (wf-error input "Bad nesting. ~S / ~S" + (mu qname) + (mu (cons cat2 sem2)))) + (when (cdr sem2) + (wf-error input "no attributes allowed in end tag")))) + +(defun process-characters (input sem) + (consume-token input) + (when (search #"]]>" sem) + (wf-error input "']]>' not allowed in CharData")) + (validate-characters *ctx* sem)) + +(defun process-cdata-section (input) + (consume-token input) + (let ((input (car (zstream-input-stack input)))) + (unless (and (rune= #/C (read-rune input)) + (rune= #/D (read-rune input)) + (rune= #/A (read-rune input)) + (rune= #/T (read-rune input)) + (rune= #/A (read-rune input)) + (rune= #/\[ (read-rune input))) + (wf-error input "After '<![', 'CDATA[' is expected.")) + (validate-characters *ctx* #"hack") ;anything other than whitespace + (read-cdata-sect input))) (defun p/content (input) ;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* @@ -2688,10 +2726,7 @@ (p/element input) (p/content input)) ((:CDATA) - (consume-token input) - (when (search #"]]>" sem) - (wf-error input "']]>' not allowed in CharData")) - (validate-characters *ctx* sem) + (process-characters input sem) (sax:characters (handler *ctx*) sem) (p/content input)) ((:ENTITY-REF) @@ -2709,21 +2744,11 @@ (peek-token input)))))) (p/content input)))) ((:<!\[) - (consume-token input) - (cons - (let ((input (car (zstream-input-stack input)))) - (unless (and (rune= #/C (read-rune input)) - (rune= #/D (read-rune input)) - (rune= #/A (read-rune input)) - (rune= #/T (read-rune input)) - (rune= #/A (read-rune input)) - (rune= #/\[ (read-rune input))) - (wf-error input "After '<![', 'CDATA[' is expected.")) - (validate-characters *ctx* #"hack") ;anything other than whitespace - (sax:start-cdata (handler *ctx*)) - (sax:characters (handler *ctx*) (read-cdata-sect input)) - (sax:end-cdata (handler *ctx*))) - (p/content input))) + (let ((data (process-cdata-section input))) + (sax:start-cdata (handler *ctx*)) + (sax:characters (handler *ctx*) data) + (sax:end-cdata (handler *ctx*))) + (p/content input)) ((:PI) (consume-token input) (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))