Author: hhubner Date: 2006-02-18 03:34:15 -0600 (Sat, 18 Feb 2006) New Revision: 1845
Added: vendor/ vendor/cxml/ vendor/cxml/CVS/ vendor/cxml/CVS/Entries vendor/cxml/CVS/Entries.Log vendor/cxml/CVS/Repository vendor/cxml/CVS/Root vendor/cxml/CVS/Template vendor/cxml/catalog.lisp vendor/cxml/characters.lisp vendor/cxml/package.lisp vendor/cxml/recoder.lisp vendor/cxml/sax-handler.lisp vendor/cxml/sax-proxy.lisp vendor/cxml/sax-tests/ vendor/cxml/sax-tests/CVS/ vendor/cxml/sax-tests/CVS/Entries vendor/cxml/sax-tests/CVS/Repository vendor/cxml/sax-tests/CVS/Root vendor/cxml/sax-tests/CVS/Template vendor/cxml/sax-tests/event-collecting-handler.lisp vendor/cxml/sax-tests/package.lisp vendor/cxml/sax-tests/tests.lisp vendor/cxml/space-normalizer.lisp vendor/cxml/split-sequence.lisp vendor/cxml/unparse.lisp vendor/cxml/util.lisp vendor/cxml/xml-name-rune-p.lisp vendor/cxml/xml-parse.lisp vendor/cxml/xmlns-normalizer.lisp vendor/cxml/xmls-compat.lisp Log: importing current cxml
Added: vendor/cxml/CVS/Entries =================================================================== --- vendor/cxml/CVS/Entries 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/CVS/Entries 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,15 @@ +/catalog.lisp/1.4/Mon Jan 23 21:49:42 2006// +/characters.lisp/1.2/Mon Nov 28 22:33:47 2005// +/package.lisp/1.11/Thu Dec 29 00:31:36 2005// +/recoder.lisp/1.5/Thu Dec 29 00:31:36 2005// +/sax-handler.lisp/1.4/Thu Dec 29 00:31:36 2005// +/sax-proxy.lisp/1.4/Thu Dec 29 00:31:36 2005// +/space-normalizer.lisp/1.2/Thu Dec 29 00:39:25 2005// +/split-sequence.lisp/1.1.1.1/Sun Mar 13 18:02:35 2005// +/unparse.lisp/1.9/Fri Feb 17 12:53:19 2006// +/util.lisp/1.2/Mon Nov 28 22:33:47 2005// +/xml-name-rune-p.lisp/1.6/Mon Nov 28 22:33:47 2005// +/xml-parse.lisp/1.59/Mon Jan 23 21:45:48 2006// +/xmlns-normalizer.lisp/1.2/Tue Dec 27 20:01:32 2005// +/xmls-compat.lisp/1.2/Mon Nov 28 22:33:47 2005// +D
Added: vendor/cxml/CVS/Entries.Log =================================================================== --- vendor/cxml/CVS/Entries.Log 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/CVS/Entries.Log 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1 @@ +A D/sax-tests////
Added: vendor/cxml/CVS/Repository =================================================================== --- vendor/cxml/CVS/Repository 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/CVS/Repository 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1 @@ +cxml/xml
Added: vendor/cxml/CVS/Root =================================================================== --- vendor/cxml/CVS/Root 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/CVS/Root 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1 @@ +:pserver:anonymous@common-lisp.net:/project/cxml/cvsroot
Added: vendor/cxml/CVS/Template ===================================================================
Added: vendor/cxml/catalog.lisp =================================================================== --- vendor/cxml/catalog.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/catalog.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,348 @@ +;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*- +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Developed 2004 for headcraft - http://headcraft.de/ +;;;; Copyright: David Lichteblau + +(in-package :cxml) + +;;; http://www.oasis-open.org/committees/entity/spec.html +;;; +;;; Bugs: +;;; - We validate using the Catalog DTD while parsing, which is too strict +;;; and will will fail to parse files using other parser's extensions. +;;; (Jedenfalls behauptet das die Spec.) +;;; A long-term solution might be an XML Schema validator. + +(defvar *prefer* :public) +(defvar *default-catalog* + '(;; libxml standard + "/etc/xml/catalog" + ;; FreeBSD + "/usr/local/share/xml/catalog.ports")) + +(defstruct (catalog (:constructor %make-catalog ())) + main-files + (dtd-cache (make-dtd-cache)) + (file-table (puri:make-uri-space))) + +(defstruct (entry-file (:conc-name "")) + (system-entries) ;extid 2 + (rewrite-system-entries) ; 3 + (delegate-system-entries) ; 4 + (public-entries) ; 5 + (delegate-public-entries) ; 6 + (uri-entries) ;uri 2 + (rewrite-uri-entries) ; 3 + (delegate-uri-entries) ; 4 + (next-catalog-entries) ; 5/7 + ) + +(defun starts-with-p (string prefix) + (let ((mismatch (mismatch string prefix))) + (or (null mismatch) (= mismatch (length prefix))))) + +(defun normalize-public (str) + (setf str (rod-to-utf8-string (rod str))) + (flet ((whitespacep (c) + (find c #.(map 'string #'code-char '(#x9 #xa #xd #x20))))) + (let ((start (position-if-not #'whitespacep str)) + (end (position-if-not #'whitespacep str :from-end t)) + (spacep nil)) + (with-output-to-string (out) + (when start + (loop for i from start to end do + (let ((c (char str i))) + (cond + ((whitespacep c) + (unless spacep + (setf spacep t) + (write-char #\space out))) + (t + (setf spacep nil) + (write-char c out)))))))))) + +(defun normalize-uri (str) + (when (typep str 'puri:uri) + (setf str (puri:render-uri str nil))) + (setf str (rod-to-utf8-string (rod str))) + (with-output-to-string (out) + (loop for ch across str do + (let ((c (char-code ch))) + (if (< c 15) + (write-string (string-upcase (format nil "%~2,'0X" c)) out) + (write-char ch out)))))) + +(defun unwrap-publicid (str) + (normalize-public + (with-output-to-string (out) + (let ((i (length "urn:publicid:")) + (n (length str))) + (while (< i n) + (let ((c (char str i))) + (case c + (#+ (write-char #\space out)) + (#: (write-string "//" out)) + (#; (write-string "::" out)) + (#% + (let ((code + (parse-integer str + :start (+ i 1) + :end (+ i 3) + :radix 16))) + (write-char (code-char code) out)) + (incf i 2)) + (t (write-char c out)))) + (incf i)))))) + +(defun match-exact (key table &optional check-prefer) + (dolist (pair table) + (destructuring-bind (from to &optional prefer) pair + (when (and (equal key from) (or (not check-prefer) (eq prefer :public))) + (return to))))) + +(defun match-prefix/rewrite (key table &optional check-prefer) + (let ((match nil) + (match-length -1)) + (dolist (pair table) + (destructuring-bind (from to &optional prefer) pair + (when (and (or (not check-prefer) (eq prefer :public)) + (starts-with-p key from) + (> (length from) match-length)) + (setf match-length (length from)) + (setf match to)))) + (if match + (concatenate 'string + match + (subseq key match-length)) + nil))) + +(defun match-prefix/sorted (key table &optional check-prefer) + (let ((result '())) + (dolist (pair table) + (destructuring-bind (from to &optional prefer) pair + (when (and (or (not check-prefer) (eq prefer :public)) + (starts-with-p key from)) + (push (cons (length from) to) result)))) + (mapcar #'cdr (sort result #'> :key #'car)))) + +(defun resolve-extid (public system catalog) + (when public (setf public (normalize-public public))) + (when system (setf system (normalize-uri system))) + (when (and system (starts-with-p system "urn:publicid:")) + (let ((new-public (unwrap-publicid system))) + (assert (or (null public) (equal public new-public))) + (setf public new-public + system nil))) + (let ((files (catalog-main-files catalog)) + (seen '())) + (while files + (let ((file (pop files)) + (delegates nil)) + (unless (typep file 'entry-file) + (setf file (find-catalog-file file catalog))) + (unless (or (null file) (member file seen)) + (push file seen) + (when system + (let ((result + (or (match-exact system (system-entries file)) + (match-prefix/rewrite + system + (rewrite-system-entries file))))) + (when result + (return result)) + (setf delegates + (match-prefix/sorted + system + (delegate-system-entries file))))) + (when (and public (not delegates)) + (let* ((check-prefer (and system t)) + (result + (match-exact public + (public-entries file) + check-prefer))) + (when result + (return result)) + (setf delegates + (match-prefix/sorted + public + (delegate-public-entries file) + check-prefer)))) + (if delegates + (setf files delegates) + (setf files (append (next-catalog-entries file) files)))))))) + +(defun resolve-uri (uri catalog) + (setf uri (normalize-uri uri)) + (when (starts-with-p uri "urn:publicid:") + (return-from resolve-uri + (resolve-extid (unwrap-publicid uri) nil catalog))) + (let ((files (catalog-main-files catalog)) + (seen '())) + (while files + (let ((file (pop files))) + (unless (typep file 'entry-file) + (setf file (find-catalog-file file catalog))) + (unless (or (null file) (member file seen)) + (push file seen) + (let ((result + (or (match-exact uri (uri-entries file)) + (match-prefix/rewrite uri (rewrite-uri-entries file))))) + (when result + (return result)) + (let* ((delegate-entries + (delegate-uri-entries file)) + (delegates + (match-prefix/sorted uri delegate-entries))) + (if delegates + (setf files delegates) + (setf files (append (next-catalog-entries file) files)))))))))) + +(defun find-catalog-file (uri catalog) + (setf uri (if (stringp uri) (safe-parse-uri uri) uri)) + (let* ((*dtd-cache* (catalog-dtd-cache catalog)) + (*cache-all-dtds* t) + (file (parse-catalog-file uri))) + (when file + (let ((interned (puri:intern-uri uri (catalog-file-table catalog)))) + (setf (getf (puri:uri-plist interned) 'catalog) file))) + file)) + +(defun make-catalog (&optional (uris *default-catalog*)) + (let ((result (%make-catalog))) + (setf (catalog-main-files result) + (loop + for uri in uris + for file = (find-catalog-file uri result) + when file collect file)) + result)) + +(defun parse-catalog-file (uri) + (handler-case + (parse-catalog-file/strict uri) + ((or file-error xml-parse-error) (c) + (warn "ignoring catalog error: ~A" c)))) + +(defparameter *catalog-dtd* + (let* ((cxml + (slot-value (asdf:find-system :cxml) 'asdf::relative-pathname)) + (dtd (merge-pathnames "catalog.dtd" cxml))) + (with-open-file (s dtd :element-type '(unsigned-byte 8)) + (let ((bytes + (make-array (file-length s) :element-type '(unsigned-byte 8)))) + (read-sequence bytes s) + bytes)))) + +(defun parse-catalog-file/strict (uri) + (let* ((*catalog* nil) + (dtd-sysid + (puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd"))) + (flet ((entity-resolver (public system) + (declare (ignore public)) + (if (puri:uri= system dtd-sysid) + (make-octet-input-stream *catalog-dtd*) + nil))) + (with-open-stream (s (open (uri-to-pathname uri) + :element-type '(unsigned-byte 8) + :direction :input)) + (parse-stream s + (make-instance 'catalog-parser :uri uri) + :validate nil + :dtd (make-extid nil dtd-sysid) + :root #"catalog" + :entity-resolver #'entity-resolver))))) + +(defclass catalog-parser () + ((result :initform (make-entry-file) :accessor result) + (next :initform '() :accessor next) + (prefer-stack :initform (list *prefer*) :accessor prefer-stack) + (base-stack :accessor base-stack))) + +(defmethod initialize-instance :after + ((instance catalog-parser) &key uri) + (setf (base-stack instance) (list uri))) + +(defmethod prefer ((handler catalog-parser)) + (car (prefer-stack handler))) + +(defmethod base ((handler catalog-parser)) + (car (base-stack handler))) + +(defun get-attribute/lname (name attributes) + (let ((a (find name attributes + :key (lambda (a) + (or (sax:attribute-local-name a) + (sax:attribute-qname a))) + :test #'string=))) + (and a (sax:attribute-value a)))) + +(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs) + (declare (ignore uri)) + (setf lname (or lname qname)) + ;; we can dispatch on lnames only because we validate against the DTD, + ;; which disallows other namespaces. + (push (let ((new (get-attribute/lname "prefer" attrs))) + (cond + ((equal new "public") :public) + ((equal new "system") :system) + ((null new) (prefer handler)))) + (prefer-stack handler)) + (push (string-or (get-attribute/lname "base" attrs) (base handler)) + (base-stack handler)) + (flet ((geturi (lname) + (puri:merge-uris + (safe-parse-uri (get-attribute/lname lname attrs)) + (base handler)))) + (cond + ((string= lname "public") + (push (list (normalize-public (get-attribute/lname "publicId" attrs)) + (geturi "uri") + (prefer handler)) + (public-entries (result handler)))) + ((string= lname "system") + (push (list (normalize-uri (get-attribute/lname "systemId" attrs)) + (geturi "uri")) + (system-entries (result handler)))) + ((string= lname "uri") + (push (list (normalize-uri (get-attribute/lname "name" attrs)) + (geturi "uri")) + (uri-entries (result handler)))) + ((string= lname "rewriteSystem") + (push (list (normalize-uri + (get-attribute/lname "systemIdStartString" attrs)) + (get-attribute/lname "rewritePrefix" attrs)) + (rewrite-system-entries (result handler)))) + ((string= lname "rewriteURI") + (push (list (normalize-uri + (get-attribute/lname "uriStartString" attrs)) + (get-attribute/lname "rewritePrefix" attrs)) + (rewrite-uri-entries (result handler)))) + ((string= lname "delegatePublic") + (push (list (normalize-public + (get-attribute/lname "publicIdStartString" attrs)) + (geturi "catalog") + (prefer handler)) + (delegate-public-entries (result handler)))) + ((string= lname "delegateSystem") + (push (list (normalize-uri + (get-attribute/lname "systemIdStartString" attrs)) + (geturi "catalog")) + (delegate-system-entries (result handler)))) + ((string= lname "delegateURI") + (push (list (normalize-uri + (get-attribute/lname "uriStartString" attrs)) + (geturi "catalog")) + (delegate-uri-entries (result handler)))) + ((string= lname "nextCatalog") + (push (geturi "catalog") + (next-catalog-entries (result handler))))))) + +(defmethod sax:end-element ((handler catalog-parser) uri lname qname) + (declare (ignore uri lname qname)) + (pop (base-stack handler)) + (pop (prefer-stack handler))) + +(defmethod sax:end-document ((handler catalog-parser)) + (result handler))
Added: vendor/cxml/characters.lisp =================================================================== --- vendor/cxml/characters.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/characters.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,145 @@ +;;;; characters.lisp -- character class definitions +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Author: David Lichteblau +;;;; Copyright (C) 2004 knowledgeTools Int. GmbH + +;;; XXX xml-name-rune-p.lisp habe ich erst nach dem Schreiben dieses +;;; Files gefunden... + +;;; XXX wird derzeit in DOM:CREATE-ATTRIBUTE verwendet. Muesste aber +;;; wohl vom Parser auch schon geprueft werden (oder tut der das +;;; schon?). Vorher sollte man allerdings die Geschwindigkeit der Sache +;;; mal untersuchen. + +(in-package :cxml) + +(defparameter *base-char-ranges* + #((#x0041 #x005A) (#x0061 #x007A) (#x00C0 #x00D6) (#x00D8 #x00F6) + (#x00F8 #x00FF) (#x0100 #x0131) (#x0134 #x013E) (#x0141 #x0148) + (#x014A #x017E) (#x0180 #x01C3) (#x01CD #x01F0) (#x01F4 #x01F5) + (#x01FA #x0217) (#x0250 #x02A8) (#x02BB #x02C1) (#x0386 #x0386) + (#x0388 #x038A) (#x038C #x038C) (#x038E #x03A1) (#x03A3 #x03CE) + (#x03D0 #x03D6) (#x03DA #x03DA) (#x03DC #x03DC) (#x03DE #x03DE) + (#x03E0 #x03E0) (#x03E2 #x03F3) (#x0401 #x040C) (#x040E #x044F) + (#x0451 #x045C) (#x045E #x0481) (#x0490 #x04C4) (#x04C7 #x04C8) + (#x04CB #x04CC) (#x04D0 #x04EB) (#x04EE #x04F5) (#x04F8 #x04F9) + (#x0531 #x0556) (#x0559 #x0559) (#x0561 #x0586) (#x05D0 #x05EA) + (#x05F0 #x05F2) (#x0621 #x063A) (#x0641 #x064A) (#x0671 #x06B7) + (#x06BA #x06BE) (#x06C0 #x06CE) (#x06D0 #x06D3) (#x06D5 #x06D5) + (#x06E5 #x06E6) (#x0905 #x0939) (#x093D #x093D) (#x0958 #x0961) + (#x0985 #x098C) (#x098F #x0990) (#x0993 #x09A8) (#x09AA #x09B0) + (#x09B2 #x09B2) (#x09B6 #x09B9) (#x09DC #x09DD) (#x09DF #x09E1) + (#x09F0 #x09F1) (#x0A05 #x0A0A) (#x0A0F #x0A10) (#x0A13 #x0A28) + (#x0A2A #x0A30) (#x0A32 #x0A33) (#x0A35 #x0A36) (#x0A38 #x0A39) + (#x0A59 #x0A5C) (#x0A5E #x0A5E) (#x0A72 #x0A74) (#x0A85 #x0A8B) + (#x0A8D #x0A8D) (#x0A8F #x0A91) (#x0A93 #x0AA8) (#x0AAA #x0AB0) + (#x0AB2 #x0AB3) (#x0AB5 #x0AB9) (#x0ABD #x0ABD) (#x0AE0 #x0AE0) + (#x0B05 #x0B0C) (#x0B0F #x0B10) (#x0B13 #x0B28) (#x0B2A #x0B30) + (#x0B32 #x0B33) (#x0B36 #x0B39) (#x0B3D #x0B3D) (#x0B5C #x0B5D) + (#x0B5F #x0B61) (#x0B85 #x0B8A) (#x0B8E #x0B90) (#x0B92 #x0B95) + (#x0B99 #x0B9A) (#x0B9C #x0B9C) (#x0B9E #x0B9F) (#x0BA3 #x0BA4) + (#x0BA8 #x0BAA) (#x0BAE #x0BB5) (#x0BB7 #x0BB9) (#x0C05 #x0C0C) + (#x0C0E #x0C10) (#x0C12 #x0C28) (#x0C2A #x0C33) (#x0C35 #x0C39) + (#x0C60 #x0C61) (#x0C85 #x0C8C) (#x0C8E #x0C90) (#x0C92 #x0CA8) + (#x0CAA #x0CB3) (#x0CB5 #x0CB9) (#x0CDE #x0CDE) (#x0CE0 #x0CE1) + (#x0D05 #x0D0C) (#x0D0E #x0D10) (#x0D12 #x0D28) (#x0D2A #x0D39) + (#x0D60 #x0D61) (#x0E01 #x0E2E) (#x0E30 #x0E30) (#x0E32 #x0E33) + (#x0E40 #x0E45) (#x0E81 #x0E82) (#x0E84 #x0E84) (#x0E87 #x0E88) + (#x0E8A #x0E8A) (#x0E8D #x0E8D) (#x0E94 #x0E97) (#x0E99 #x0E9F) + (#x0EA1 #x0EA3) (#x0EA5 #x0EA5) (#x0EA7 #x0EA7) (#x0EAA #x0EAB) + (#x0EAD #x0EAE) (#x0EB0 #x0EB0) (#x0EB2 #x0EB3) (#x0EBD #x0EBD) + (#x0EC0 #x0EC4) (#x0F40 #x0F47) (#x0F49 #x0F69) (#x10A0 #x10C5) + (#x10D0 #x10F6) (#x1100 #x1100) (#x1102 #x1103) (#x1105 #x1107) + (#x1109 #x1109) (#x110B #x110C) (#x110E #x1112) (#x113C #x113C) + (#x113E #x113E) (#x1140 #x1140) (#x114C #x114C) (#x114E #x114E) + (#x1150 #x1150) (#x1154 #x1155) (#x1159 #x1159) (#x115F #x1161) + (#x1163 #x1163) (#x1165 #x1165) (#x1167 #x1167) (#x1169 #x1169) + (#x116D #x116E) (#x1172 #x1173) (#x1175 #x1175) (#x119E #x119E) + (#x11A8 #x11A8) (#x11AB #x11AB) (#x11AE #x11AF) (#x11B7 #x11B8) + (#x11BA #x11BA) (#x11BC #x11C2) (#x11EB #x11EB) (#x11F0 #x11F0) + (#x11F9 #x11F9) (#x1E00 #x1E9B) (#x1EA0 #x1EF9) (#x1F00 #x1F15) + (#x1F18 #x1F1D) (#x1F20 #x1F45) (#x1F48 #x1F4D) (#x1F50 #x1F57) + (#x1F59 #x1F59) (#x1F5B #x1F5B) (#x1F5D #x1F5D) (#x1F5F #x1F7D) + (#x1F80 #x1FB4) (#x1FB6 #x1FBC) (#x1FBE #x1FBE) (#x1FC2 #x1FC4) + (#x1FC6 #x1FCC) (#x1FD0 #x1FD3) (#x1FD6 #x1FDB) (#x1FE0 #x1FEC) + (#x1FF2 #x1FF4) (#x1FF6 #x1FFC) (#x2126 #x2126) (#x212A #x212B) + (#x212E #x212E) (#x2180 #x2182) (#x3041 #x3094) (#x30A1 #x30FA) + (#x3105 #x312C) (#xAC00 #xD7A3))) + +(defparameter *ideographic-ranges* + #((#x3007 #x3007) (#x3021 #x3029)(#x4E00 #x9FA5))) + +(defparameter *combining-char-ranges* + #((#x0300 #x0345) (#x0360 #x0361) (#x0483 #x0486) (#x0591 #x05A1) + (#x05A3 #x05B9) (#x05BB #x05BD) (#x05BF #x05BF) (#x05C1 #x05C2) + (#x05C4 #x05C4) (#x064B #x0652) (#x0670 #x0670) (#x06D6 #x06DC) + (#x06DD #x06DF) (#x06E0 #x06E4) (#x06E7 #x06E8) (#x06EA #x06ED) + (#x0901 #x0903) (#x093C #x093C) (#x093E #x094C) (#x094D #x094D) + (#x0951 #x0954) (#x0962 #x0963) (#x0981 #x0983) (#x09BC #x09BC) + (#x09BE #x09BE) (#x09BF #x09BF) (#x09C0 #x09C4) (#x09C7 #x09C8) + (#x09CB #x09CD) (#x09D7 #x09D7) (#x09E2 #x09E3) (#x0A02 #x0A02) + (#x0A3C #x0A3C) (#x0A3E #x0A3E) (#x0A3F #x0A3F) (#x0A40 #x0A42) + (#x0A47 #x0A48) (#x0A4B #x0A4D) (#x0A70 #x0A71) (#x0A81 #x0A83) + (#x0ABC #x0ABC) (#x0ABE #x0AC5) (#x0AC7 #x0AC9) (#x0ACB #x0ACD) + (#x0B01 #x0B03) (#x0B3C #x0B3C) (#x0B3E #x0B43) (#x0B47 #x0B48) + (#x0B4B #x0B4D) (#x0B56 #x0B57) (#x0B82 #x0B83) (#x0BBE #x0BC2) + (#x0BC6 #x0BC8) (#x0BCA #x0BCD) (#x0BD7 #x0BD7) (#x0C01 #x0C03) + (#x0C3E #x0C44) (#x0C46 #x0C48) (#x0C4A #x0C4D) (#x0C55 #x0C56) + (#x0C82 #x0C83) (#x0CBE #x0CC4) (#x0CC6 #x0CC8) (#x0CCA #x0CCD) + (#x0CD5 #x0CD6) (#x0D02 #x0D03) (#x0D3E #x0D43) (#x0D46 #x0D48) + (#x0D4A #x0D4D) (#x0D57 #x0D57) (#x0E31 #x0E31) (#x0E34 #x0E3A) + (#x0E47 #x0E4E) (#x0EB1 #x0EB1) (#x0EB4 #x0EB9) (#x0EBB #x0EBC) + (#x0EC8 #x0ECD) (#x0F18 #x0F19) (#x0F35 #x0F35) (#x0F37 #x0F37) + (#x0F39 #x0F39) (#x0F3E #x0F3E) (#x0F3F #x0F3F) (#x0F71 #x0F84) + (#x0F86 #x0F8B) (#x0F90 #x0F95) (#x0F97 #x0F97) (#x0F99 #x0FAD) + (#x0FB1 #x0FB7) (#x0FB9 #x0FB9) (#x20D0 #x20DC) (#x20E1 #x20E1) + (#x302A #x302F) (#x3099 #x3099) (#x309A #x309A))) + +(defparameter *digit-ranges* + #((#x0030 #x0039) (#x0660 #x0669) (#x06F0 #x06F9) (#x0966 #x096F) + (#x09E6 #x09EF) (#x0A66 #x0A6F) (#x0AE6 #x0AEF) (#x0B66 #x0B6F) + (#x0BE7 #x0BEF) (#x0C66 #x0C6F) (#x0CE6 #x0CEF) (#x0D66 #x0D6F) + (#x0E50 #x0E59) (#x0ED0 #x0ED9) (#x0F20 #x0F29))) + +(defparameter *extender-ranges* + #((#x00B7 #x00B7) (#x02D0 #x02D0) (#x02D1 #x02D1) (#x0387 #x0387) + (#x0640 #x0640) (#x0E46 #x0E46) (#x0EC6 #x0EC6) (#x3005 #x3005) + (#x3031 #x3035) (#x309D #x309E) (#x30FC #x30FE))) + +(defun valid-name-p (rod) + (and (not (zerop (length rod))) + (let ((initial (elt rod 0))) + (or (rune-in-range-p initial *base-char-ranges*) + (rune-in-range-p initial *ideographic-ranges*) + (rune= initial #/_) + (eql initial #/:))) + (every #'rune-name-char-p rod))) + +(defun valid-nmtoken-p (rod) + (and (not (zerop (length rod))) + (every #'rune-name-char-p rod))) + +(defun rune-name-char-p (rune) + (or (rune-in-range-p rune *base-char-ranges*) + (rune-in-range-p rune *ideographic-ranges*) + (rune-in-range-p rune *digit-ranges*) + (eql rune #/.) + (eql rune #/-) + (eql rune #/_) + (eql rune #/:) + (rune-in-range-p rune *combining-char-ranges*) + (rune-in-range-p rune *extender-ranges*))) + +(defun rune-in-range-p (rune range) + ;; XXX FIXME, das geht doch besser + (let ((code (rune-code rune))) + (block nil + (map nil (lambda (range) + (when (< code (car range)) + (return nil)) + (when (<= code (cadr range)) + (return t))) + range))))
Added: vendor/cxml/package.lisp =================================================================== --- vendor/cxml/package.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/package.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,82 @@ +;;;; package.lisp -- Paketdefinition +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. + +(in-package :cl-user) + +(defpackage :cxml + (:use :cl :runes :runes-encoding :trivial-gray-streams) + (:export + ;; xstreams + #:make-xstream + #:make-rod-xstream + #:close-xstream + #:read-rune + #:peek-rune + #:unread-rune + #:fread-rune + #:fpeek-rune + #:xstream-position + #:xstream-line-number + #:xstream-column-number + #:xstream-plist + #:xstream-encoding + + ;; xstream controller protocol + #:read-octects + #:xstream/close + + #:attribute-namespace-uri + #:attribute-local-name + #:attribute-qname + #:attribute-value + + #:parse-file + #:parse-stream + #:parse-rod + #:parse-octets + + #:make-octet-vector-sink + #:make-octet-stream-sink + #:make-rod-sink + #+rune-is-character #:make-string-sink + #+rune-is-character #:make-character-stream-sink + #-rune-is-character #:make-string-sink/utf8 + #-rune-is-character #:make-character-stream-sink/utf8 + + #:with-xml-output + #:with-element + #:attribute + #:cdata + #:text + + #:xml-parse-error + #:well-formedness-violation + #:validity-error + + #:parse-dtd-file + #:parse-dtd-stream + #:make-validator + + #:*cache-all-dtds* + #:*dtd-cache* + #:getdtd + #:remdtd + #:make-dtd-cache + #:clear-dtd-cache + #:make-extid + + #:*catalog* + #:*prefer* + #:make-catalog + #:resolve-uri + #:resolve-extid + + #:make-recoder + #:sax-proxy + #:proxy-chained-handler + #:make-namespace-normalizer + #:make-whitespace-normalizer + #:rod-to-utf8-string + #:utf8-string-to-rod))
Added: vendor/cxml/recoder.lisp =================================================================== --- vendor/cxml/recoder.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/recoder.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,125 @@ +;;;; recoder.lisp -- SAX handler for string conversion +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Developed 2004 for headcraft - http://headcraft.de/ +;;;; Copyright: David Lichteblau + +(in-package :cxml) + +(defclass recoder () + ((recoder :initarg :recoder :accessor recoder) + (chained-handler :initarg :chained-handler :accessor chained-handler))) + +(defun make-recoder (chained-handler recoder-fn) + (make-instance 'recoder + :recoder recoder-fn + :chained-handler chained-handler)) + +(macrolet ((%string (rod) + `(let ((rod ,rod)) + (if (typep rod '(or rod string)) + (funcall (recoder handler) rod) + rod))) + (defwrapper (name (&rest args) &rest forms) + `(defmethod ,name ((handler recoder) ,@args) + (,name (chained-handler handler) ,@forms)))) + (defwrapper sax:start-document ()) + + (defwrapper sax:start-element + (namespace-uri local-name qname attributes) + (%string namespace-uri) + (%string local-name) + (%string qname) + (mapcar (lambda (attr) + (sax:make-attribute + :namespace-uri (%string (sax:attribute-namespace-uri attr)) + :local-name (%string (sax:attribute-local-name attr)) + :qname (%string (sax:attribute-qname attr)) + :value (%string (sax:attribute-value attr)) + :specified-p (sax:attribute-specified-p attr))) + attributes)) + + (defwrapper sax:start-prefix-mapping (prefix uri) + (%string prefix) + (%string uri)) + + (defwrapper sax:characters (data) + (%string data)) + + (defwrapper sax:processing-instruction (target data) + (%string target) + (%string data)) + + (defwrapper sax:end-prefix-mapping (prefix) + (%string prefix)) + + (defwrapper sax:end-element (namespace-uri local-name qname) + (%string namespace-uri) + (%string local-name) + (%string qname)) + + (defwrapper sax:end-document ()) + + (defwrapper sax:comment (data) + (%string data)) + + (defwrapper sax:start-cdata ()) + + (defwrapper sax:end-cdata ()) + + (defwrapper sax:start-dtd (name public-id system-id) + (%string name) + (%string public-id) + (%string system-id)) + + (defwrapper sax:start-internal-subset ()) + (defwrapper sax:end-internal-subset ()) + + (defwrapper sax:end-dtd ()) + + (defwrapper sax:unparsed-entity-declaration + (name public-id system-id notation-name) + (%string name) + (%string public-id) + (%string system-id) + (%string notation-name)) + + (defwrapper sax:external-entity-declaration + (kind name public-id system-id) + (%string kind) + (%string name) + (%string public-id) + (%string system-id)) + + (defwrapper sax:internal-entity-declaration + (kind name value) + kind + (%string name) + (%string value)) + + (defwrapper sax:notation-declaration + (name public-id system-id) + (%string name) + (%string public-id) + (%string system-id)) + + (defwrapper sax:element-declaration (name model) + (%string name) + model) + + (defwrapper sax:attribute-declaration + (element-name attribute-name type default) + (%string element-name) + (%string attribute-name) + (%string type) + (%string default)) + + (defwrapper sax:entity-resolver + (resolver) + resolver) + + (defwrapper sax::dtd + (dtd) + dtd))
Added: vendor/cxml/sax-handler.lisp =================================================================== --- vendor/cxml/sax-handler.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-handler.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,354 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: SAX; readtable: runes; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: A SAX2-like API for the xml parser +;;; Created: 2003-06-30 +;;; Author: Henrik Motakef hmot@henrik-motakef.de +;;; Author: David Lichteblau (DTD-related changes) +;;; License: BSD +;;; --------------------------------------------------------------------------- +;;; � copyright 2003 by Henrik Motakef +;;; � copyright 2004 knowledgeTools Int. GmbH + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions are +;;; met: +;;; +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution +;;; +;;; THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +;;; WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +;;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +;;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +;;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +;;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +;;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +;;; IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;;; POSSIBILITY OF SUCH DAMAGE. + +;;; TODO/ Open Questions: + +;; o Should there be a predefined "handler" class, or even several +;; (like Java SAX' ContentHandler, DTDHandler, LexicalHandler etc? I +;; don't really see why. +;; o Missing stuff from Java SAX2: +;; * ignorable-whitespace +;; * document-locator/(setf document-locator) +;; (probably implies a handler class with an appropriate slot) +;; * skipped-entity +;; * The whole ErrorHandler class, this is better handled using +;; conditions (but isn't yet) +;; * The LexicalHandler (start-cdata etc) would be nice [-- partly done] + +(defpackage :sax + (:use :common-lisp) + (:export #:*namespace-processing* + #:*include-xmlns-attributes* + #:*use-xmlns-namespace* + + #:make-attribute + #:find-attribute + #:find-attribute-ns + #:attribute-namespace-uri + #:attribute-local-name + #:attribute-qname + #:attribute-value + #:attribute-specified-p + + #:start-document + #:start-prefix-mapping + #:start-element + #:characters + #:processing-instruction + #:end-element + #:end-prefix-mapping + #:end-document + #:comment + #:start-cdata + #:end-cdata + #:start-dtd + #:end-dtd + #:start-internal-subset + #:end-internal-subset + #:unparsed-entity-declaration + #:external-entity-declaration + #:internal-entity-declaration + #:notation-declaration + #:element-declaration + #:attribute-declaration + #:entity-resolver)) + +(in-package :sax) + +;; The http://xml.org/sax/features/namespaces property +(defvar *namespace-processing* t + "If non-nil (the default), namespace processing is enabled. + +See also `start-element' and `end-element' for a detailed description +of the consequences of modifying this variable, and +`*include-xmlns-attributes*' and `*use-xmlns-namespace*' for further +related options.") + +;; The http://xml.org/sax/features/namespace-prefixes property. +(defvar *include-xmlns-attributes* t + "If non-nil, namespace declarations are reported as normal +attributes. + +This variable has no effect unless `*namespace-processing*' is +non-nil. + +See also `*use-xmlns-namespace*', and `start-element' for a detailed +description of the consequences of setting this variable.") + +(defvar *use-xmlns-namespace* t + "If this variable is nil (the default), attributes with a name like +'xmlns:x' are not considered to be in a namespace, following the +'Namespaces in XML' specification. + +If it is non-nil, such attributes are considered to be in a namespace +with the URI 'http://www.w3.org/2000/xmlns/', following an +incompatible change silently introduced in the errata to that spec, +and adopted by some W3C standards. + +For example, an attribute like xmlns:ex='http://example.com' would be +reported like this: + +*use-xmlns-namespace*: nil +namespace-uri: nil +local-name: nil +qname: #"xmlns:ex" + +*use-xmlns-namespace*: t +namespace-uri: #"http://www.w3.org/2000/xmlns/%5C" +local-name: #"ex" +qname: #"xmlns:ex" + +Setting this variable has no effect unless both +`*namespace-processing*' and `*include-xmlns-attributes*' are non-nil.") + +(defstruct attribute + namespace-uri + local-name + qname + value + specified-p) + +(defun %rod= (x y) + ;; allow rods *and* strings *and* null + (cond + ((zerop (length x)) (zerop (length y))) + ((zerop (length y)) nil) + ((stringp x) (string= x y)) + (t (runes:rod= x y)))) + +(defun find-attribute (qname attrs) + (find qname attrs :key #'attribute-qname :test #'%rod=)) + +(defun find-attribute-ns (uri lname attrs) + (find-if (lambda (attr) + (and (%rod= uri (sax:attribute-namespace-uri attr)) + (%rod= lname (sax:attribute-local-name attr)))) + attrs)) + +(defgeneric start-document (handler) + (:documentation "Called at the beginning of the parsing process, +before any element, processing instruction or comment is reported. + +Handlers that need to maintain internal state may use this to perform +any neccessary initializations.") + (:method ((handler t)) nil)) + +(defgeneric start-element (handler namespace-uri local-name qname attributes) + (:documentation "Called to report the beginning of an element. + +There will always be a corresponding call to end-element, even in the +case of an empty element (i.e. <foo/>). + +If the value of *namespaces* is non-nil, namespace-uri, local-name and +qname are rods. If it is nil, namespace-uri and local-name are always +nil, and it is not an error if the qname is not a well-formed +qualified element name (for example, if it contains more than one +colon). + +The attributes parameter is a list (in arbitrary order) of instances +of the `attribute' structure class. The for their namespace-uri and +local-name properties, the same rules as for the element name +apply. Additionally, namespace-declaring attributes (those whose name +is "xmlns" or starts with "xmlns:") are only included if +*namespace-prefixes* is non-nil.") + (:method ((handler t) namespace-uri local-name qname attributes) + (declare (ignore namespace-uri local-name qname attributes)) + nil)) + +(defgeneric start-prefix-mapping (handler prefix uri) + (:documentation "Called when the scope of a new prefix -> namespace-uri mapping begins. + +This will always be called immediatly before the `start-element' event +for the element on which the namespaces are declared. + +Clients don't usually have to implement this except under special +circumstances, for example when they have to deal with qualified names +in textual content. The parser will handle namespaces of elements and +attributes on its own.") + (:method ((handler t) prefix uri) (declare (ignore prefix uri)) nil)) + +(defgeneric characters (handler data) + (:documentation "Called for textual element content. + +The data is passed as a rod, with all entity references resolved. +It is possible that the character content of an element is reported +via multiple subsequent calls to this generic function.") + (:method ((handler t) data) (declare (ignore data)) nil)) + +(defgeneric processing-instruction (handler target data) + (:documentation "Called when a processing instruction is read. + +Both target and data are rods.") + (:method ((handler t) target data) (declare (ignore target data)) nil)) + +(defgeneric end-prefix-mapping (handler prefix) + (:documentation "Called when a prefix -> namespace-uri mapping goes out of scope. + +This will always be called immediatly after the `end-element' event +for the element on which the namespace is declared. The order of the +end-prefix-mapping events is otherwise not guaranteed. + +Clients don't usually have to implement this except under special +circumstances, for example when they have to deal with qualified names +in textual content. The parser will handle namespaces of elements and +attributes on its own.") + (:method ((handler t) prefix) prefix nil)) + +(defgeneric end-element (handler namespace-uri local-name qname) + (:documentation "Called to report the end of an element. + +See the documentation for `start-element' for a description of the +parameters.") + (:method ((handler t) namespace-uri local-name qname) + (declare (ignore namespace-uri local-name qname)) + nil)) + +(defgeneric end-document (handler) + (:documentation "Called at the end of parsing a document. +This is always the last function called in the parsing process. + +In contrast to all of the other methods, the return value of this gf +is significant, it will be returned by the parse-file/stream/string function.") + (:method ((handler t)) nil)) + +;; LexicalHandler + +(defgeneric comment (handler data) + (:method ((handler t) data) data nil)) + +(defgeneric start-cdata (handler) + (:documentation "Called at the beginning of parsing a CDATA section. + +Handlers only have to implement this if they are interested in the +lexical structure of the parsed document. The content of the CDATA +section is reported via the `characters' generic function like all +other textual content.") + (:method ((handler t)) nil)) + +(defgeneric end-cdata (handler) + (:documentation "Called at the end of parsing a CDATA section. + +Handlers only have to implement this if they are interested in the +lexical structure of the parsed document. The content of the CDATA +section is reported via the `characters' generic function like all +other textual content.") + (:method ((handler t)) nil)) + +(defgeneric start-dtd (handler name public-id system-id) + (:documentation "Called at the beginning of parsing a DTD.") + (:method ((handler t) name public-id system-id) + (declare (ignore name public-id system-id)) + nil)) + +(defgeneric end-dtd (handler) + (:documentation "Called at the end of parsing a DTD.") + (:method ((handler t)) nil)) + +(defgeneric start-internal-subset (handler) + (:documentation "Reports that an internal subset is present. Called before +any definition from the internal subset is reported.") + (:method ((handler t)) nil)) + +(defgeneric end-internal-subset (handler) + (:documentation "Called after processing of the internal subset has +finished, if present.") + (:method ((handler t)) nil)) + +(defgeneric unparsed-entity-declaration + (handler name public-id system-id notation-name) + (:documentation + "Called when an unparsed entity declaration is seen in a DTD.") + (:method ((handler t) name public-id system-id notation-name) + (declare (ignore name public-id system-id notation-name)) + nil)) + +(defgeneric external-entity-declaration + (handler kind name public-id system-id) + (:documentation + "Called when a parsed external entity declaration is seen in a DTD.") + (:method ((handler t) kind name public-id system-id) + (declare (ignore kind name public-id system-id)) + nil)) + +(defgeneric internal-entity-declaration + (handler kind name value) + (:documentation + "Called when an internal entity declaration is seen in a DTD.") + (:method ((handler t) kind name value) + (declare (ignore kind name value)) + nil)) + +(defgeneric notation-declaration + (handler name public-id system-id) + (:documentation + "Called when a notation declaration is seen while parsing a DTD.") + (:method ((handler t) name public-id system-id) + (declare (ignore name public-id system-id)) + nil)) + +(defgeneric element-declaration (handler name model) + (:documentation + "Called when a element declaration is seen in a DTD. Model is not a string, + but a nested list, with *, ?, +, OR, and AND being the operators, rods + as names, :EMPTY and :PCDATA as special tokens. (AND represents + sequences.)") + (:method ((handler t) name model) + (declare (ignore name model)) + nil)) + +(defgeneric attribute-declaration + (handler element-name attribute-name type default) + (:documentation + "Called when an attribute declaration is seen in a DTD. + type one of :CDATA, :ID, :IDREF, :IDREFS, + :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS, + (:NOTATION <name>*), or (:ENUMERATION <name>*) + default :REQUIRED, :IMPLIED, (:FIXED content), or (:DEFAULT content)") + (:method ((handler t) element-name attribute-name type value) + (declare (ignore element-name attribute-name type value)) + nil)) + +(defgeneric entity-resolver + (handler resolver) + (:documentation + "Called between sax:end-dtd and sax:end-document to register an entity + resolver, a function of two arguments: An entity name and SAX handler. + When called, the resolver function will parse the named entity's data.") + (:method ((handler t) resolver) + (declare (ignore resolver)) + nil)) + +;; internal for now +(defgeneric dtd (handler dtd) + (:method ((handler t) dtd) (declare (ignore dtd)) nil))
Added: vendor/cxml/sax-proxy.lisp =================================================================== --- vendor/cxml/sax-proxy.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-proxy.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,41 @@ +;;;; sax-proxy.lisp +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Copyright (c) 2004 David Lichteblau +;;;; Author: David Lichteblau + +(in-package :cxml) + +(defclass sax-proxy () + ((chained-handler :initform nil + :initarg :chained-handler + :accessor proxy-chained-handler))) + +(macrolet ((define-proxy-method (name (&rest args)) + `(defmethod ,name ((handler sax-proxy) ,@args) + (,name (proxy-chained-handler handler) ,@args)))) + (define-proxy-method sax:start-document ()) + (define-proxy-method sax:start-element (uri lname qname attributes)) + (define-proxy-method sax:start-prefix-mapping (prefix uri)) + (define-proxy-method sax:characters (data)) + (define-proxy-method sax:processing-instruction (target data)) + (define-proxy-method sax:end-prefix-mapping (prefix)) + (define-proxy-method sax:end-element (namespace-uri local-name qname)) + (define-proxy-method sax:end-document ()) + (define-proxy-method sax:comment (data)) + (define-proxy-method sax:start-cdata ()) + (define-proxy-method sax:end-cdata ()) + (define-proxy-method sax:start-dtd (name public-id system-id)) + (define-proxy-method sax:end-dtd ()) + (define-proxy-method sax:start-internal-subset ()) + (define-proxy-method sax:end-internal-subset ()) + (define-proxy-method sax:unparsed-entity-declaration (name pub sys not)) + (define-proxy-method sax:external-entity-declaration (kind name pub sys)) + (define-proxy-method sax:internal-entity-declaration (kind name value)) + (define-proxy-method sax:notation-declaration (name public-id system-id)) + (define-proxy-method sax:element-declaration (name model)) + (define-proxy-method sax:attribute-declaration (elt attr type default)) + (define-proxy-method sax:entity-resolver (resolver)) + (define-proxy-method sax::dtd (dtd)))
Added: vendor/cxml/sax-tests/CVS/Entries =================================================================== --- vendor/cxml/sax-tests/CVS/Entries 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-tests/CVS/Entries 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,4 @@ +/event-collecting-handler.lisp/1.1.1.1/Sun Mar 13 18:02:10 2005// +/package.lisp/1.1.1.1/Sun Mar 13 18:02:10 2005// +/tests.lisp/1.2/Wed Dec 28 23:18:07 2005// +D
Added: vendor/cxml/sax-tests/CVS/Repository =================================================================== --- vendor/cxml/sax-tests/CVS/Repository 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-tests/CVS/Repository 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1 @@ +cxml/xml/sax-tests
Added: vendor/cxml/sax-tests/CVS/Root =================================================================== --- vendor/cxml/sax-tests/CVS/Root 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-tests/CVS/Root 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1 @@ +:pserver:anonymous@common-lisp.net:/project/cxml/cvsroot
Added: vendor/cxml/sax-tests/CVS/Template ===================================================================
Added: vendor/cxml/sax-tests/event-collecting-handler.lisp =================================================================== --- vendor/cxml/sax-tests/event-collecting-handler.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-tests/event-collecting-handler.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,37 @@ +(in-package :sax-tests) + +(defclass event-collecting-handler () + ((event-list :initform '() :accessor event-list))) + +(defmethod start-document ((handler event-collecting-handler)) + (push (list :start-document) (event-list handler))) + +(defmethod start-element ((handler event-collecting-handler) ns-uri local-name qname attrs) + (push (list :start-element ns-uri local-name qname attrs) + (event-list handler))) + +(defmethod start-prefix-mapping ((handler event-collecting-handler) prefix uri) + (push (list :start-prefix-mapping prefix uri) + (event-list handler))) + +(defmethod characters ((handler event-collecting-handler) data) + (push (list :characters data) + (event-list handler))) + +(defmethod processing-instruction ((handler event-collecting-handler) target data) + (push (list :processing-instruction target data) + (event-list handler))) + +(defmethod end-prefix-mapping ((handler event-collecting-handler) prefix) + (push (list :end-prefix-mapping prefix) + (event-list handler))) + +(defmethod end-element ((handler event-collecting-handler) namespace-uri local-name qname) + (push (list :end-element namespace-uri local-name qname) + (event-list handler))) + +(defmethod end-document ((handler event-collecting-handler)) + (push (list :end-document) + (event-list handler)) + + (nreverse (event-list handler))) \ No newline at end of file
Added: vendor/cxml/sax-tests/package.lisp =================================================================== --- vendor/cxml/sax-tests/package.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-tests/package.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,4 @@ +(defpackage :sax-tests + (:use :cl :xml :sax :glisp :rt) + (:export #:event-collecting-handler)) +
Added: vendor/cxml/sax-tests/tests.lisp =================================================================== --- vendor/cxml/sax-tests/tests.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/sax-tests/tests.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,330 @@ +(in-package :sax-tests) + +(defun first-start-element-event (string) + (let ((events (cxml:parse-rod string (make-instance 'event-collecting-handler)))) + (find :start-element events :key #'car))) + + +;;; Attribute handling + +(deftest no-default-namespace-for-attributes + (let* ((evt (first-start-element-event "<x xmlns='http://example.com' a='b'/>")) + (attr (car (fifth evt)))) + (values + (attribute-namespace-uri attr) + (attribute-local-name attr))) + nil nil) + +(deftest attribute-uniqueness-1 + (handler-case + (cxml:parse-rod "<x xmlns:a='http://example.com' xmlns:b='http://example.com' a:a='1' b:a='1'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest attribute-uniqueness-2 + (handler-case + (cxml:parse-rod "<x xmlns:a='http://example.com' xmlns='http://example.com' a:a='1' a='1'/>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t)) + t) + +(deftest attribute-uniqueness-3 + (let ((sax:*namespace-processing* nil)) + (handler-case + (cxml:parse-rod "<x xmlns:a='http://example.com' xmlns:b='http://example.com' a:a='1' b:a='1'/>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +;;; Namespace undeclaring + +(deftest undeclare-default-namespace-1 + (let* ((evts (cxml:parse-rod "<x xmlns='http://example.com'><y xmlns='' a='1'/></x>" + (make-instance 'event-collecting-handler))) + (start-elt-events (remove :start-element evts :test (complement #'eql) :key #'car)) + (evt1 (first start-elt-events)) + (evt2 (second start-elt-events ))) + (values + (rod= #"http://example.com" (second evt1)) + (second evt2) + (third evt2))) + t nil nil) + +(deftest undeclare-other-namespace + (handler-case + (cxml:parse-rod "<x:x xmlns:x='http://example.com%27%3E<x:y xmlns:x='' a='1'/></x:x>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + + +;;; Require names otherwise totally out of scope of the xmlns rec to be NcNames for no reason + +(deftest pi-names-are-ncnames-when-namespace-processing-1 + (handler-case + (cxml:parse-rod "<?a:b c?><x/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest pi-names-are-ncnames-when-namespace-processing-2 + (let ((sax:*namespace-processing* nil)) + (handler-case + (cxml:parse-rod "<?a:b c?><x/>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-1 + (handler-case + (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x>&y:z;</x>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-2 + (handler-case + (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-3 + (let ((sax:*namespace-processing* nil)) + (handler-case + (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x>&y:z;</x>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-4 + (let ((sax:*namespace-processing* nil)) + (handler-case + (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x/>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +;;; Inclusion of xmlns attributes + +(deftest xmlns-attr-include-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (length attrs)) + 1) + +(deftest xmlns-attr-discard-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* nil) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (length attrs)) + 0) + +;;; Namespace of xmlns attributes + +(deftest xmlns-attr-ns-uri-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (attribute-namespace-uri (car attrs))) + nil) + +(deftest xmlns-attr-ns-uri-2 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>")) + (attrs (fifth evt))) + (attribute-namespace-uri (car attrs))) + nil) + +(deftest xmlns-attr-ns-uri-3 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (attribute-namespace-uri (car attrs))) + nil) + +(deftest xmlns-attr-ns-uri-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>")) + (attrs (fifth evt))) + (rod= #"http://www.w3.org/2000/xmlns/" (attribute-namespace-uri (car attrs)))) + t) + +(deftest xmlns-attr-ns-local-name-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (attribute-local-name (car attrs))) + nil) + +(deftest xmlns-attr-ns-local-name-2 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>")) + (attrs (fifth evt))) + (attribute-local-name (car attrs))) + nil) + +(deftest xmlns-attr-ns-local-name-3 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (attribute-local-name (car attrs))) + nil) + +(deftest xmlns-attr-ns-local-name-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>")) + (attrs (fifth evt))) + (rod= #"foo" (attribute-local-name (car attrs)))) + t) + +(deftest xmlns-attr-qname-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (rod= #"xmlns" (attribute-qname (car attrs)))) + t) + +(deftest xmlns-attr-qname-2 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>")) + (attrs (fifth evt))) + (rod= #"xmlns:foo" (attribute-qname (car attrs)))) + t) + +(deftest xmlns-attr-qname-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "<x xmlns='http://example.com'/>")) + (attrs (fifth evt))) + (rod= #"xmlns" (attribute-qname (car attrs)))) + t) + +(deftest xmlns-attr-qname-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>")) + (attrs (fifth evt))) + (rod= #"xmlns:foo" (attribute-qname (car attrs)))) + t) + + +;;; Predefined Namespaces + +(deftest redefine-xml-namespace-1 + (handler-case + (cxml:parse-rod "<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t)) + t) + +(deftest redefine-xml-namespace-2 + (handler-case + (cxml:parse-rod "<x xmlns:xml='http://example.com/wrong-uri'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xml-namespace-3 + (handler-case + (cxml:parse-rod "<x xmlns:wrong='http://www.w3.org/XML/1998/namespace'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xml-namespace-4 + (handler-case + (cxml:parse-rod "<x xmlns:wrong='http://www.w3.org/XML/1998/namespace'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-1 + (handler-case + (cxml:parse-rod "<x xmlns:xmlns='http://www.w3.org/2000/xmlns/'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-2 + (handler-case + (cxml:parse-rod "<x xmlns:xmlns='http://example.com/wrong-ns'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-3 + (handler-case + (cxml:parse-rod "<x xmlns:wrong='http://www.w3.org/2000/xmlns/'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-4 + (handler-case + (cxml:parse-rod "<x xmlns='http://www.w3.org/2000/xmlns/'/>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t)
Added: vendor/cxml/space-normalizer.lisp =================================================================== --- vendor/cxml/space-normalizer.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/space-normalizer.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,62 @@ +;;;; space-normalizer.lisp -- whitespace removal +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Copyright (c) 2005 David Lichteblau + +(in-package :cxml) + +(defclass whitespace-normalizer (sax-proxy) + ((attributes :initform '(t) :accessor xml-space-attributes) + (models :initform nil :accessor xml-space-models) + (dtd :initarg :dtd :accessor xml-space-dtd))) + +(defun make-whitespace-normalizer (chained-handler &optional dtd) + (make-instance 'whitespace-normalizer + :dtd dtd + :chained-handler chained-handler)) + +(defmethod sax::dtd ((handler whitespace-normalizer) dtd) + (unless (xml-space-dtd handler) + (setf (xml-space-dtd handler) dtd))) + +(defmethod sax:start-element + ((handler whitespace-normalizer) uri lname qname attrs) + (declare (ignore uri lname)) + (let ((dtd (xml-space-dtd handler))) + (when dtd + (let ((xml-space + (sax:find-attribute (if (stringp qname) "xml:space" #"xml:space") + attrs))) + (push (if xml-space + (rod= (rod (sax:attribute-value xml-space)) #"default") + (car (xml-space-attributes handler))) + (xml-space-attributes handler))) + (let* ((e (cxml::find-element (rod qname) dtd)) + (cspec (when e (cxml::elmdef-content e)))) + (push (and (consp cspec) + (not (and (eq (car cspec) '*) + (let ((subspec (second cspec))) + (and (eq (car subspec) 'or) + (eq (cadr subspec) :PCDATA)))))) + (xml-space-models handler))))) + (call-next-method)) + +(defmethod sax:characters ((handler whitespace-normalizer) data) + (cond + ((and (xml-space-dtd handler) + (car (xml-space-attributes handler)) + (car (xml-space-models handler))) + (unless (every #'white-space-rune-p (rod data)) + (warn "non-whitespace character data in element content") + (call-next-method))) + (t + (call-next-method)))) + +(defmethod sax:end-element ((handler whitespace-normalizer) uri lname qname) + (declare (ignore uri lname qname)) + (when (xml-space-dtd handler) + (pop (xml-space-attributes handler)) + (pop (xml-space-models handler))) + (call-next-method))
Added: vendor/cxml/split-sequence.lisp =================================================================== --- vendor/cxml/split-sequence.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/split-sequence.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,44 @@ +;;; This code was based on Arthur Lemmens' in +;;; URL:http://groups.google.com/groups?as_umsgid=39F36F1A.B8F19D20%40simplex.nl; + +(in-package :cxml) + +(defun split-sequence-if (predicate seq &key (count nil) (remove-empty-subseqs nil) (from-end nil) (start 0) (end nil) (key nil key-supplied)) + (let ((len (length seq)) + (other-keys (when key-supplied + (list :key key)))) + (unless end (setq end len)) + (if from-end + (loop for right = end then left + for left = (max (or (apply #'position-if predicate seq + :end right + :from-end t + other-keys) + -1) + (1- start)) + unless (and (= right (1+ left)) + remove-empty-subseqs) ; empty subseq we don't want + if (and count (>= nr-elts count)) + ;; We can't take any more. Return now. + return (values (nreverse subseqs) right) + else + collect (subseq seq (1+ left) right) into subseqs + and sum 1 into nr-elts + until (< left start) + finally (return (values (nreverse subseqs) (1+ left)))) + (loop for left = start then (+ right 1) + for right = (min (or (apply #'position-if predicate seq + :start left + other-keys) + len) + end) + unless (and (= right left) + remove-empty-subseqs) ; empty subseq we don't want + if (and count (>= nr-elts count)) + ;; We can't take any more. Return now. + return (values subseqs left) + else + collect (subseq seq left right) into subseqs + and sum 1 into nr-elts + until (>= right end) + finally (return (values subseqs right))))))
Added: vendor/cxml/unparse.lisp =================================================================== --- vendor/cxml/unparse.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/unparse.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,569 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unparse XML +;;; Title: (including support for canonic XML according to J.Clark) +;;; Created: 1999-09-09 +;;; Author: Gilbert Baumann unk6@rz.uni-karlsruhe.de +;;; Author: David Lichteblau david@lichteblau.com +;;; License: Lisp-LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; �� copyright 1999 by Gilbert Baumann +;;; �� copyright 2004 by knowledgeTools Int. GmbH +;;; �� copyright 2004 by David Lichteblau (for headcraft.de) + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :cxml) + +;; +;; | Canonical XML +;; | ============= +;; | +;; | This document defines a subset of XML called canonical XML. The +;; | intended use of canonical XML is in testing XML processors, as a +;; | representation of the result of parsing an XML document. +;; | +;; | Every well-formed XML document has a unique structurally equivalent +;; | canonical XML document. Two structurally equivalent XML documents have +;; | a byte-for-byte identical canonical XML document. Canonicalizing an +;; | XML document requires only information that an XML processor is +;; | required to make available to an application. +;; | +;; | A canonical XML document conforms to the following grammar: +;; | +;; | CanonXML ::= Pi* element Pi* +;; | element ::= Stag (Datachar | Pi | element)* Etag +;; | Stag ::= '<' Name Atts '>' +;; | Etag ::= '</' Name '>' +;; | Pi ::= '<?' Name ' ' (((Char - S) Char*)? - (Char* '?>' Char*)) '?>' +;; | Atts ::= (' ' Name '=' '"' Datachar* '"')* +;; | Datachar ::= '&' | '<' | '>' | '"' +;; | | '	'| ' '| ' ' +;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD)) +;; | Name ::= (see XML spec) +;; | Char ::= (see XML spec) +;; | S ::= (see XML spec) +;; | +;; | Attributes are in lexicographical order (in Unicode bit order). +;; | +;; | A canonical XML document is encoded in UTF-8. +;; | +;; | Ignorable white space is considered significant and is treated +;; | equivalently to data. +;; +;; -- James Clark (jjc@jclark.com) + + +;;;; SINK: an xml output sink + +(defclass sink () + ((ystream :initarg :ystream :accessor sink-ystream) + (width :initform 79 :initarg :width :accessor width) + (canonical :initform t :initarg :canonical :accessor canonical) + (indentation :initform nil :initarg :indentation :accessor indentation) + (current-indentation :initform 0 :accessor current-indentation) + (notations :initform (make-buffer :element-type t) :accessor notations) + (name-for-dtd :accessor name-for-dtd) + (previous-notation :initform nil :accessor previous-notation) + (have-doctype :initform nil :accessor have-doctype) + (stack :initform nil :accessor stack))) + +(defmethod initialize-instance :after ((instance sink) &key) + (when (eq (canonical instance) t) + (setf (canonical instance) 1)) + (unless (member (canonical instance) '(nil 1 2)) + (error "Invalid canonical form: ~A" (canonical instance))) + (when (and (canonical instance) (indentation instance)) + (error "Cannot indent XML in canonical mode"))) + +(defun make-buffer (&key (element-type '(unsigned-byte 8))) + (make-array 1 + :element-type element-type + :adjustable t + :fill-pointer 0)) + +;; total haesslich, aber die ystreams will ich im moment eigentlich nicht +;; dokumentieren +(macrolet ((define-maker (make-sink make-ystream &rest args) + `(defun ,make-sink (,@args &rest initargs) + (apply #'make-instance + 'sink + :ystream (,make-ystream ,@args) + initargs)))) + (define-maker make-octet-vector-sink make-octet-vector-ystream) + (define-maker make-octet-stream-sink make-octet-stream-ystream stream) + (define-maker make-rod-sink make-rod-ystream) + + #+rune-is-character + (define-maker make-character-stream-sink make-character-stream-ystream stream) + + #-rune-is-character + (define-maker make-string-sink/utf8 make-string-ystream/utf8) + + #-rune-is-character + (define-maker make-character-stream-sink/utf8 + make-character-stream-ystream/utf8 + stream)) + +#+rune-is-character +(defun make-string-sink (&rest args) (apply #'make-rod-sink args)) + + +(defmethod sax:end-document ((sink sink)) + (close-ystream (sink-ystream sink))) + + +;;;; doctype and notations + +(defmethod sax:start-document ((sink sink)) + (unless (canonical sink) + (%write-rod #"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" sink) + (%write-rune #/U+000A sink))) + +(defmethod sax:start-dtd ((sink sink) name public-id system-id) + (setf (name-for-dtd sink) name) + (unless (canonical sink) + (ensure-doctype sink public-id system-id))) + +(defun ensure-doctype (sink &optional public-id system-id) + (unless (have-doctype sink) + (setf (have-doctype sink) t) + (%write-rod #"<!DOCTYPE " sink) + (%write-rod (name-for-dtd sink) sink) + (cond + (public-id + (%write-rod #" PUBLIC \"" sink) + (unparse-string public-id sink) + (%write-rod #"\" \"" sink) + (unparse-string system-id sink) + (%write-rod #"\"" sink)) + (system-id + (%write-rod #" SYSTEM \"" sink) + (unparse-string public-id sink) + (%write-rod #"\"" sink))))) + +(defmethod sax:start-internal-subset ((sink sink)) + (ensure-doctype sink) + (%write-rod #" [" sink) + (%write-rune #/U+000A sink)) + +(defmethod sax:end-internal-subset ((sink sink)) + (ensure-doctype sink) + (%write-rod #"]" sink)) + +(defmethod sax:notation-declaration ((sink sink) name public-id system-id) + (let ((prev (previous-notation sink))) + (when (and (and (canonical sink) (>= (canonical sink) 2)) + prev + (not (rod< prev name))) + (error "misordered notations; cannot unparse canonically")) + (setf (previous-notation sink) name)) + (%write-rod #"<!NOTATION " sink) + (%write-rod name sink) + (cond + ((zerop (length public-id)) + (%write-rod #" SYSTEM '" sink) + (%write-rod system-id sink) + (%write-rune #/' sink)) + ((zerop (length system-id)) + (%write-rod #" PUBLIC '" sink) + (%write-rod public-id sink) + (%write-rune #/' sink)) + (t + (%write-rod #" PUBLIC '" sink) + (%write-rod public-id sink) + (%write-rod #"' '" sink) + (%write-rod system-id sink) + (%write-rune #/' sink))) + (%write-rune #/> sink) + (%write-rune #/U+000A sink)) + +(defmethod sax:unparsed-entity-declaration + ((sink sink) name public-id system-id notation-name) + (unless (and (canonical sink) (< (canonical sink) 3)) + (%write-rod #"<!ENTITY " sink) + (%write-rod name sink) + (cond + ((zerop (length public-id)) + (%write-rod #" SYSTEM '" sink) + (%write-rod system-id sink) + (%write-rune #/' sink)) + ((zerop (length system-id)) + (%write-rod #" PUBLIC '" sink) + (%write-rod public-id sink) + (%write-rune #/' sink)) + (t + (%write-rod #" PUBLIC '" sink) + (%write-rod public-id sink) + (%write-rod #"' '" sink) + (%write-rod system-id sink) + (%write-rune #/' sink))) + (%write-rod #" NDATA " sink) + (%write-rod notation-name sink) + (%write-rune #/> sink) + (%write-rune #/U+000A sink))) + +(defmethod sax:external-entity-declaration + ((sink sink) kind name public-id system-id) + (when (canonical sink) + (error "cannot serialize parsed entities in canonical mode")) + (%write-rod #"<!ENTITY " sink) + (when (eq kind :parameter) + (%write-rod #" % " sink)) + (%write-rod name sink) + (cond + ((zerop (length public-id)) + (%write-rod #" SYSTEM '" sink) + (%write-rod system-id sink) + (%write-rune #/' sink)) + ((zerop (length system-id)) + (%write-rod #" PUBLIC '" sink) + (%write-rod public-id sink) + (%write-rune #/' sink)) + (t + (%write-rod #" PUBLIC '" sink) + (%write-rod public-id sink) + (%write-rod #"' '" sink) + (%write-rod system-id sink) + (%write-rune #/' sink))) + (%write-rune #/> sink) + (%write-rune #/U+000A sink)) + +(defmethod sax:internal-entity-declaration ((sink sink) kind name value) + (when (canonical sink) + (error "cannot serialize parsed entities in canonical mode")) + (%write-rod #"<!ENTITY " sink) + (when (eq kind :parameter) + (%write-rod #" % " sink)) + (%write-rod name sink) + (%write-rune #/U+0020 sink) + (%write-rune #/\" sink) + (unparse-string value sink) + (%write-rune #/\" sink) + (%write-rune #/> sink) + (%write-rune #/U+000A sink)) + +(defmethod sax:element-declaration ((sink sink) name model) + (when (canonical sink) + (error "cannot serialize element type declarations in canonical mode")) + (%write-rod #"<!ELEMENT " sink) + (%write-rod name sink) + (%write-rune #/U+0020 sink) + (labels ((walk (m) + (cond + ((eq m :EMPTY) + (%write-rod "EMPTY" sink)) + ((eq m :PCDATA) + (%write-rod "#PCDATA" sink)) + ((atom m) + (unparse-string m sink)) + (t + (ecase (car m) + (and + (%write-rune #/\( sink) + (loop for (n . rest) on (cdr m) do + (walk n) + (when rest + (%write-rune #\, sink))) + (%write-rune #/\) sink)) + (or + (%write-rune #/\( sink) + (loop for (n . rest) on (cdr m) do + (walk n) + (when rest + (%write-rune #\| sink))) + (%write-rune #/\) sink)) + (* + (walk (second m)) + (%write-rod #/* sink)) + (+ + (walk (second m)) + (%write-rod #/+ sink)) + (? + (walk (second m)) + (%write-rod #/? sink))))))) + (walk model)) + (%write-rune #/> sink) + (%write-rune #/U+000A sink)) + +(defmethod sax:attribute-declaration ((sink sink) ename aname type default) + (when (canonical sink) + (error "cannot serialize attribute type declarations in canonical mode")) + (%write-rod #"<!ATTLIST " sink) + (%write-rod ename sink) + (%write-rune #/U+0020 sink) + (%write-rod aname sink) + (%write-rune #/U+0020 sink) + (cond + ((atom type) + (%write-rod (rod (string-upcase (symbol-name type))) sink)) + (t + (when (eq :NOTATION (car type)) + (%write-rod #"NOTATION " sink)) + (%write-rune #/\( sink) + (loop for (n . rest) on (cdr type) do + (%write-rod n sink) + (when rest + (%write-rune #\| sink))) + (%write-rune #/\) sink))) + (cond + ((atom default) + (%write-rune #/# sink) + (%write-rod (rod (string-upcase (symbol-name default))) sink)) + (t + (when (eq :FIXED (car default)) + (%write-rod #"#FIXED " sink)) + (%write-rune #/\" sink) + (unparse-string (second default) sink) + (%write-rune #/\" sink))) + (%write-rune #/> sink) + (%write-rune #/U+000A sink)) + +(defmethod sax:end-dtd ((sink sink)) + (when (have-doctype sink) + (%write-rod #">" sink) + (%write-rune #/U+000A sink))) + + +;;;; elements + +(defstruct (tag (:constructor make-tag (name))) + name + (n-children 0) + (have-gt nil)) + +(defun sink-fresh-line (sink) + (unless (zerop (ystream-column (sink-ystream sink))) + (%write-rune #/U+000A sink) ;newline + (indent sink))) + +(defun maybe-close-tag (sink) + (let ((tag (car (stack sink)))) + (when (and (tag-p tag) (not (tag-have-gt tag))) + (setf (tag-have-gt tag) t) + (%write-rune #/> sink)))) + +(defmethod sax:start-element + ((sink sink) namespace-uri local-name qname attributes) + (declare (ignore namespace-uri local-name)) + (maybe-close-tag sink) + (when (stack sink) + (incf (tag-n-children (first (stack sink))))) + (push (make-tag qname) (stack sink)) + (when (indentation sink) + (sink-fresh-line sink) + (start-indentation-block sink)) + (%write-rune #/< sink) + (%write-rod qname sink) + (let ((atts (sort (copy-list attributes) #'rod< :key #'sax:attribute-qname))) + (dolist (a atts) + (%write-rune #/space sink) + (%write-rod (sax:attribute-qname a) sink) + (%write-rune #/= sink) + (%write-rune #/" sink) + (unparse-string (sax:attribute-value a) sink) + (%write-rune #/" sink))) + (when (canonical sink) + (maybe-close-tag sink))) + +(defmethod sax:end-element + ((sink sink) namespace-uri local-name qname) + (declare (ignore namespace-uri local-name)) + (let ((tag (pop (stack sink)))) + (unless (tag-p tag) + (error "output does not nest: not in an element")) + (unless (rod= (tag-name tag) qname) + (error "output does not nest: expected ~A but got ~A" + (rod qname) (rod (tag-name tag)))) + (when (indentation sink) + (end-indentation-block sink) + (unless (zerop (tag-n-children tag)) + (sink-fresh-line sink))) + (cond + ((tag-have-gt tag) + (%write-rod '#.(string-rod "</") sink) + (%write-rod qname sink) + (%write-rod '#.(string-rod ">") sink)) + (t + (%write-rod #"/>" sink))))) + +(defmethod sax:processing-instruction ((sink sink) target data) + (maybe-close-tag sink) + (unless (rod-equal target '#.(string-rod "xml")) + (%write-rod '#.(string-rod "<?") sink) + (%write-rod target sink) + (when data + (%write-rune #/space sink) + (%write-rod data sink)) + (%write-rod '#.(string-rod "?>") sink))) + +(defmethod sax:start-cdata ((sink sink)) + (maybe-close-tag sink) + (push :cdata (stack sink))) + +(defmethod sax:characters ((sink sink) data) + (maybe-close-tag sink) + (cond + ((and (eq (car (stack sink)) :cdata) + (not (canonical sink)) + (not (search #"]]" data))) + (when (indentation sink) + (sink-fresh-line sink)) + (%write-rod #"<![CDATA[" sink) + ;; XXX signal error if body is unprintable? + (map nil (lambda (c) (%write-rune c sink)) data) + (%write-rod #"]]>" sink)) + (t + (if (indentation sink) + (unparse-indented-text data sink) + (let ((y (sink-ystream sink))) + (if (canonical sink) + (loop for c across data do (unparse-datachar c y)) + (loop for c across data do (unparse-datachar-readable c y)))))))) + +(defmethod sax:end-cdata ((sink sink)) + (unless (eq (pop (stack sink)) :cdata) + (error "output does not nest: not in a cdata section"))) + +(defun indent (sink) + (dotimes (x (current-indentation sink)) + (%write-rune #/U+0020 sink))) ; space + +(defun start-indentation-block (sink) + (incf (current-indentation sink) (indentation sink))) + +(defun end-indentation-block (sink) + (decf (current-indentation sink) (indentation sink))) + +(defun unparse-indented-text (data sink) + (flet ((whitespacep (x) + (or (rune= x #/U+000A) (rune= x #/U+0020)))) + (let* ((n (length data)) + (pos (position-if-not #'whitespacep data)) + (need-whitespace-p nil)) + (cond + ((zerop n)) + (pos + (sink-fresh-line sink) + (while (< pos n) + (let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n)) + (next (or (position-if-not #'whitespacep data :start w) n))) + (when need-whitespace-p + (if (< (+ (ystream-column (sink-ystream sink)) w (- pos)) + (width sink)) + (%write-rune #/U+0020 sink) + (sink-fresh-line sink))) + (loop + with y = (sink-ystream sink) + for i from pos below w do + (unparse-datachar-readable (elt data i) y)) + (setf need-whitespace-p (< w n)) + (setf pos next)))) + (t + (%write-rune #/U+0020 sink)))))) + +(defun unparse-string (str sink) + (let ((y (sink-ystream sink))) + (loop for rune across str do (unparse-datachar rune y)))) + +(defun unparse-datachar (c ystream) + (cond ((rune= c #/&) (write-rod '#.(string-rod "&") ystream)) + ((rune= c #/<) (write-rod '#.(string-rod "<") ystream)) + ((rune= c #/>) (write-rod '#.(string-rod ">") ystream)) + ((rune= c #/") (write-rod '#.(string-rod """) ystream)) + ((rune= c #/U+0009) (write-rod '#.(string-rod "	") ystream)) + ((rune= c #/U+000A) (write-rod '#.(string-rod " ") ystream)) + ((rune= c #/U+000D) (write-rod '#.(string-rod " ") ystream)) + (t + (write-rune c ystream)))) + +(defun unparse-datachar-readable (c ystream) + (cond ((rune= c #/&) (write-rod '#.(string-rod "&") ystream)) + ((rune= c #/<) (write-rod '#.(string-rod "<") ystream)) + ((rune= c #/>) (write-rod '#.(string-rod ">") ystream)) + ((rune= c #/") (write-rod '#.(string-rod """) ystream)) + (t + (write-rune c ystream)))) + +(defun %write-rune (c sink) + (write-rune c (sink-ystream sink))) + +(defun %write-rod (r sink) + (write-rod r (sink-ystream sink))) + + +;;;; convenience functions for DOMless XML serialization + +(defvar *current-element*) +(defvar *sink*) + +(defmacro with-xml-output (sink &body body) + `(invoke-with-xml-output (lambda () ,@body) ,sink)) + +(defun invoke-with-xml-output (fn sink) + (let ((*sink* sink) + (*current-element* nil)) + (sax:start-document *sink*) + (funcall fn) + (sax:end-document *sink*))) + +(defmacro with-element (qname &body body) + `(invoke-with-element (lambda () ,@body) ,qname)) + +(defun maybe-emit-start-tag () + (when *current-element* + ;; starting child node, need to emit opening tag of parent first: + (destructuring-bind (qname &rest attributes) *current-element* + (sax:start-element *sink* nil nil qname (reverse attributes))) + (setf *current-element* nil))) + +(defun invoke-with-element (fn qname) + (setf qname (rod qname)) + (maybe-emit-start-tag) + (let ((*current-element* (list qname))) + (multiple-value-prog1 + (funcall fn) + (maybe-emit-start-tag) + (sax:end-element *sink* nil nil qname)))) + +(defun attribute (name value) + (push (sax:make-attribute :qname (rod name) :value (rod value)) + (cdr *current-element*)) + value) + +(defun cdata (data) + (sax:start-cdata *sink*) + (sax:characters *sink* (rod data)) + (sax:end-cdata *sink*) + data) + +(defun text (data) + (maybe-emit-start-tag) + (sax:characters *sink* (rod data)) + data) + +(defun rod-to-utf8-string (rod) + (let ((out (make-buffer :element-type 'character))) + (runes-to-utf8/adjustable-string out rod (length rod)) + out)) + +(defun utf8-string-to-rod (str) + (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)) + (buffer (make-array (length bytes) :element-type '(unsigned-byte 16))) + (n (decode-sequence :utf-8 bytes 0 (length bytes) buffer 0 0 nil)) + (result (make-array n :element-type 'rune))) + (map-into result #'code-rune buffer) + result))
Added: vendor/cxml/util.lisp =================================================================== --- vendor/cxml/util.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/util.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,73 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Some common utilities for the Closure browser +;;; Created: 1997-12-27 +;;; Author: Gilbert Baumann unk6@rz.uni-karlsruhe.de +;;; License: Lisp-LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1997-1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-24 GB = fixed MULTIPLE-VALUE-OR it now takes any number of +;; subforms +;; + +(in-package :cxml) + +;;; -------------------------------------------------------------------------------- +;;; Meta functions + +(defun curry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append args more)))) + +(defun rcurry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append more args)))) + +(defun compose (f g) + #'(lambda (&rest args) + (funcall f (apply g args)))) + +;;; -------------------------------------------------------------------------------- +;;; while and until + +(defmacro while (test &body body) + `(until (not ,test) ,@body)) + +(defmacro until (test &body body) + `(do () (,test) ,@body)) + +;; prime numbers + +(defun primep (n) + "Returns true, iff `n' is prime." + (and (> n 2) + (do ((i 2 (+ i 1))) + ((> (* i i) n) t) + (cond ((zerop (mod n i)) (return nil)))))) + +(defun nearest-greater-prime (n) + "Returns the smallest prime number no less than `n'." + (cond ((primep n) n) + ((nearest-greater-prime (+ n 1)))))
Added: vendor/cxml/xml-name-rune-p.lisp =================================================================== --- vendor/cxml/xml-name-rune-p.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/xml-name-rune-p.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,227 @@ +;;;; xml-name-rune-p -- character class definitions +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Author: Gilbert Baumann unk6@rz.uni-karlsruhe.de + +(in-package :cxml) + +#.(funcall + (compile + nil + '(lambda () + (let ((+max+ #xD800)) + (labels + ((name-start-rune-p (rune) + (or (letter-rune-p rune) + (= #.(char-code #_) rune) + (= #.(char-code #:) rune))) + + (name-rune-p (rune) + (or (letter-rune-p rune) + (digit-rune-p* rune) + (= rune #.(char-code #.)) + (= rune #.(char-code #-)) + (= rune #.(char-code #_)) + (= rune #.(char-code #:)) + (combining-rune-p rune) + (extender-rune-p rune))) + + (letter-rune-p (rune) + (or (base-rune-p rune) + (ideographic-rune-p rune))) + + (digit-rune-p* (rune) + (or (<= 48 rune 57) + (<= 1632 rune 1641) + (<= 1776 rune 1785) + (<= 2406 rune 2415) + (<= 2534 rune 2543) + (<= 2662 rune 2671) + (<= 2790 rune 2799) + (<= 2918 rune 2927) + (<= 3047 rune 3055) + (<= 3174 rune 3183) + (<= 3302 rune 3311) + (<= 3430 rune 3439) + (<= 3664 rune 3673) + (<= 3792 rune 3801) + (<= 3872 rune 3881))) + + + (combining-rune-p (rune) + (or (<= 768 rune 837) + (<= 864 rune 865) + (<= 1155 rune 1158) + (<= 1425 rune 1441) + (<= 1443 rune 1465) + (<= 1467 rune 1469) + (= 1471 rune) + (<= 1473 rune 1474) + (= 1476 rune) + (<= 1611 rune 1618) + (= 1648 rune) + (<= 1750 rune 1756) + (<= 1757 rune 1759) + (<= 1760 rune 1764) + (<= 1767 rune 1768) + (<= 1770 rune 1773) + (<= 2305 rune 2307) + (= 2364 rune) + (<= 2366 rune 2380) + (= 2381 rune) + (<= 2385 rune 2388) + (<= 2402 rune 2403) + (<= 2433 rune 2435) + (= 2492 rune) + (= 2494 rune) + (= 2495 rune) + (<= 2496 rune 2500) + (<= 2503 rune 2504) + (<= 2507 rune 2509) + (= 2519 rune) + (<= 2530 rune 2531) + (= 2562 rune) + (= 2620 rune) + (= 2622 rune) + (= 2623 rune) + (<= 2624 rune 2626) + (<= 2631 rune 2632) + (<= 2635 rune 2637) + (<= 2672 rune 2673) + (<= 2689 rune 2691) + (= 2748 rune) + (<= 2750 rune 2757) + (<= 2759 rune 2761) + (<= 2763 rune 2765) + (<= 2817 rune 2819) + (= 2876 rune) + (<= 2878 rune 2883) + (<= 2887 rune 2888) + (<= 2891 rune 2893) + (<= 2902 rune 2903) + (<= 2946 rune 2947) + (<= 3006 rune 3010) + (<= 3014 rune 3016) + (<= 3018 rune 3021) + (= 3031 rune) + (<= 3073 rune 3075) + (<= 3134 rune 3140) + (<= 3142 rune 3144) + (<= 3146 rune 3149) + (<= 3157 rune 3158) + (<= 3202 rune 3203) + (<= 3262 rune 3268) + (<= 3270 rune 3272) + (<= 3274 rune 3277) + (<= 3285 rune 3286) + (<= 3330 rune 3331) + (<= 3390 rune 3395) + (<= 3398 rune 3400) + (<= 3402 rune 3405) + (= 3415 rune) + (= 3633 rune) + (<= 3636 rune 3642) + (<= 3655 rune 3662) + (= 3761 rune) + (<= 3764 rune 3769) + (<= 3771 rune 3772) + (<= 3784 rune 3789) + (<= 3864 rune 3865) + (= 3893 rune) + (= 3895 rune) + (= 3897 rune) + (= 3902 rune) + (= 3903 rune) + (<= 3953 rune 3972) + (<= 3974 rune 3979) + (<= 3984 rune 3989) + (= 3991 rune) + (<= 3993 rune 4013) + (<= 4017 rune 4023) + (= 4025 rune) + (<= 8400 rune 8412) + (= 8417 rune) + (<= 12330 rune 12335) + (= 12441 rune) + (= 12442 rune))) + + (extender-rune-p (rune) + (or + (= 183 rune) + (= 720 rune) + (= 721 rune) + (= 903 rune) + (= 1600 rune) + (= 3654 rune) + (= 3782 rune) + (= 12293 rune) + (<= 12337 rune 12341) + (<= 12445 rune 12446) + (<= 12540 rune 12542))) + + (base-rune-p (rune) + ;; split into two ORs for LispWorks... + (or + (or (<= 65 rune 90) (<= 97 rune 122) (<= 192 rune 214) (<= 216 rune 246) (<= 248 rune 255) (<= 256 rune 305) + (<= 308 rune 318) (<= 321 rune 328) (<= 330 rune 382) (<= 384 rune 451) (<= 461 rune 496) (<= 500 rune 501) + (<= 506 rune 535) (<= 592 rune 680) (<= 699 rune 705) (= 902 rune) (<= 904 rune 906) (= 908 rune) + (<= 910 rune 929) (<= 931 rune 974) (<= 976 rune 982) (= 986 rune) (= 988 rune) (= 990 rune) (= 992 rune) + (<= 994 rune 1011) (<= 1025 rune 1036) (<= 1038 rune 1103) (<= 1105 rune 1116) (<= 1118 rune 1153) + (<= 1168 rune 1220) (<= 1223 rune 1224) (<= 1227 rune 1228) (<= 1232 rune 1259) (<= 1262 rune 1269) + (<= 1272 rune 1273) (<= 1329 rune 1366) (= 1369 rune) (<= 1377 rune 1414) (<= 1488 rune 1514) + (<= 1520 rune 1522) (<= 1569 rune 1594) (<= 1601 rune 1610) (<= 1649 rune 1719) (<= 1722 rune 1726) + (<= 1728 rune 1742) (<= 1744 rune 1747) (= 1749 rune) (<= 1765 rune 1766) (<= 2309 rune 2361) (= 2365 rune) + (<= 2392 rune 2401) (<= 2437 rune 2444) (<= 2447 rune 2448) (<= 2451 rune 2472) (<= 2474 rune 2480) + (= 2482 rune) (<= 2486 rune 2489) (<= 2524 rune 2525) (<= 2527 rune 2529) (<= 2544 rune 2545) + (<= 2565 rune 2570) (<= 2575 rune 2576) (<= 2579 rune 2600) (<= 2602 rune 2608) (<= 2610 rune 2611) + (<= 2613 rune 2614) (<= 2616 rune 2617) (<= 2649 rune 2652) (= 2654 rune) (<= 2674 rune 2676) + (<= 2693 rune 2699) (= 2701 rune) (<= 2703 rune 2705) (<= 2707 rune 2728) (<= 2730 rune 2736) + (<= 2738 rune 2739) (<= 2741 rune 2745) (= 2749 rune) (= 2784 rune) (<= 2821 rune 2828) (<= 2831 rune 2832) + (<= 2835 rune 2856) (<= 2858 rune 2864) (<= 2866 rune 2867) (<= 2870 rune 2873) (= 2877 rune) + (<= 2908 rune 2909) (<= 2911 rune 2913) (<= 2949 rune 2954) (<= 2958 rune 2960) (<= 2962 rune 2965) + (<= 2969 rune 2970) (= 2972 rune)) + (or (<= 2974 rune 2975) (<= 2979 rune 2980) (<= 2984 rune 2986) + (<= 2990 rune 2997) (<= 2999 rune 3001) (<= 3077 rune 3084) (<= 3086 rune 3088) (<= 3090 rune 3112) + (<= 3114 rune 3123) (<= 3125 rune 3129) (<= 3168 rune 3169) (<= 3205 rune 3212) (<= 3214 rune 3216) + (<= 3218 rune 3240) (<= 3242 rune 3251) (<= 3253 rune 3257) (= 3294 rune) (<= 3296 rune 3297) + (<= 3333 rune 3340) (<= 3342 rune 3344) (<= 3346 rune 3368) (<= 3370 rune 3385) (<= 3424 rune 3425) + (<= 3585 rune 3630) (= 3632 rune) (<= 3634 rune 3635) (<= 3648 rune 3653) (<= 3713 rune 3714) (= 3716 rune) + (<= 3719 rune 3720) (= 3722 rune) (= 3725 rune) (<= 3732 rune 3735) (<= 3737 rune 3743) (<= 3745 rune 3747) + (= 3749 rune) (= 3751 rune) (<= 3754 rune 3755) (<= 3757 rune 3758) (= 3760 rune) (<= 3762 rune 3763) (= 3773 rune) + (<= 3776 rune 3780) (<= 3904 rune 3911) (<= 3913 rune 3945) (<= 4256 rune 4293) (<= 4304 rune 4342) + (= 4352 rune) (<= 4354 rune 4355) (<= 4357 rune 4359) (= 4361 rune) (<= 4363 rune 4364) (<= 4366 rune 4370) + (= 4412 rune) (= 4414 rune) (= 4416 rune) (= 4428 rune) (= 4430 rune) (= 4432 rune) (<= 4436 rune 4437) (= 4441 rune) + (<= 4447 rune 4449) (= 4451 rune) (= 4453 rune) (= 4455 rune) (= 4457 rune) (<= 4461 rune 4462) (<= 4466 rune 4467) + (= 4469 rune) (= 4510 rune) (= 4520 rune) (= 4523 rune) (<= 4526 rune 4527) (<= 4535 rune 4536) (= 4538 rune) + (<= 4540 rune 4546) (= 4587 rune) (= 4592 rune) (= 4601 rune) (<= 7680 rune 7835) (<= 7840 rune 7929) + (<= 7936 rune 7957) (<= 7960 rune 7965) (<= 7968 rune 8005) (<= 8008 rune 8013) (<= 8016 rune 8023) + (= 8025 rune) (= 8027 rune) (= 8029 rune) (<= 8031 rune 8061) (<= 8064 rune 8116) (<= 8118 rune 8124) (= 8126 rune) + (<= 8130 rune 8132) (<= 8134 rune 8140) (<= 8144 rune 8147) (<= 8150 rune 8155) (<= 8160 rune 8172) + (<= 8178 rune 8180) (<= 8182 rune 8188) (= 8486 rune) (<= 8490 rune 8491) (= 8494 rune) (<= 8576 rune 8578) + (<= 12353 rune 12436) (<= 12449 rune 12538) (<= 12549 rune 12588) (<= 44032 rune 55203)))) + + (ideographic-rune-p (rune) + (or (<= 19968 rune 40869) (= 12295 rune) (<= 12321 rune 12329))) + + + (predicate-to-bv (p) + (let ((r (make-array +max+ :element-type 'bit :initial-element 0))) + (dotimes (i #x10000 r) + (when (funcall p i) + (setf (aref r i) 1))))) ) + + `(progn + (DEFINLINE NAME-RUNE-P (RUNE) + (SETF RUNE (RUNE-CODE RUNE)) + (AND (<= 0 RUNE ,+max+) + (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3))) + (= 1 (SBIT ',(predicate-to-bv #'name-rune-p) + (THE FIXNUM RUNE)))))) + (DEFINLINE NAME-START-RUNE-P (RUNE) + (SETF RUNE (RUNE-CODE RUNE)) + (AND (<= 0 RUNE ,+MAX+) + (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3))) + (= 1 (SBIT ',(predicate-to-bv #'name-start-rune-p) + (THE FIXNUM RUNE)))))))) ))))
Added: vendor/cxml/xml-parse.lisp =================================================================== --- vendor/cxml/xml-parse.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/xml-parse.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,3544 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; -*- +;;; --------------------------------------------------------------------------- +;;; Title: XML parser +;;; Created: 1999-07-17 +;;; Author: Gilbert Baumann unk6@rz.uni-karlsruhe.de +;;; Author: Henrik Motakef hmot@henrik-motakef.de +;;; Author: David Lichteblau david@lichteblau.com +;;; License: Lisp-LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann +;;; © copyright 2003 by Henrik Motakef +;;; © copyright 2004 knowledgeTools Int. GmbH +;;; © copyright 2004 David Lichteblau +;;; © copyright 2005 David Lichteblau + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +;;; Streams + +;;; xstreams + +;; For reading runes, I defined my own streams, called xstreams, +;; because we want to be fast. A function call or even a method call +;; per character is not acceptable, instead of that we define a +;; buffered stream with and advertised buffer layout, so that we +;; could use the trick stdio uses: READ-RUNE and PEEK-RUNE are macros, +;; directly accessing the buffer and only calling some underflow +;; handler in case of stream underflows. This will yield to quite a +;; performance boost vs calling READ-BYTE per character. + +;; Also we need to do encoding t conversion on ; this better done at large chunks of data rather than on a character +;; by character basis. This way we need a dispatch on the active +;; encoding only once in a while, instead of for each character. This +;; allows us to use a CLOS interface to do the underflow handling. + +;;; zstreams + +;; Now, for reading tokens, we define another kind of streams, called +;; zstreams. These zstreams also maintain an input stack to implement +;; inclusion of external entities. This input stack contains xstreams +;; or the special marker :STOP. Such a :STOP marker indicates, that +;; input should not continue there, but well stop; that is simulate an +;; EOF. The user is then responsible to pop this marker off the input +;; stack. +;; +;; This input stack is also used to detect circular entity inclusion. + +;; The zstream tokenizer recognizes the following types of tokens and +;; is controlled by the *DATA-BEHAVIOUR* flag. (Which should become a +;; slot of zstreams instead). + +;; Common +;; :xml-decl (<target> . <content>) ;processing-instruction starting with "<?xml" +;; :pi (<target> . <content>) ;processing-instruction +;; :stag (<name> . <atts>) ;start tag +;; :etag (<name> . <atts>) ;end tag +;; :ztag (<name> . <atts>) ;empty tag +;; :<!element +;; :<!entity +;; :<!attlist +;; :<!notation +;; :<!doctype +;; :<![ +;; :comment <content> + +;; *data-behaviour* = :DTD +;; +;; :nmtoken <interned-rod> +;; :#required +;; :#implied +;; :#fixed +;; :#pcdata +;; :s +;; :\[ :\] :\( :\) :|\ :\> :\" :\' :\, :\? :\* :\+ + +;; *data-behaviour* = :DOC +;; +;; :entity-ref <interned-rod> +;; :cdata <rod> + + +;;; TODO +;; +;; o provide for a faster DOM +;; +;; o morph zstream into a context object and thus also get rid of +;; special variables. Put the current DTD there too. +;; [partly done] + +;; o the *scratch-pad* hack should become something much more +;; reentrant, we could either define a system-wide resource +;; or allocate some scratch-pads per context. +;; [for thread-safety reasons the array are allocated per context now, +;; reentrancy is still open] + +;; o CR handling in utf-16 deocders +;; +;; o UCS-4 reader +;; +;; o max depth together with circle detection +;; (or proof, that our circle detection is enough). +;; [gemeint ist zstream-push--david] +;; +;; o better extensibility wrt character representation, one may want to +;; have +;; - UCS-4 in vectoren +;; +;; o xstreams auslagern, documententieren und dann auch in SGML und +;; CSS parser verwenden. (halt alles was zeichen liest). +;; [ausgelagert sind sie; dokumentiert "so la la"; die Reintegration +;; in Closure ist ein ganz anderes Thema] +;; +;; o recording of source locations for nodes. +;; +;; o based on the DTD and xml:space attribute implement HTML white +;; space rules. +;; +;; o on a parser option, do not expand external entities. + +;;;; Validity constraints: +;;;; (00) Root Element Type like (03), c.f. MAKE-ROOT-MODEL +;;;; (01) Proper Declaration/PE Nesting P/MARKUP-DECL +;;;; (02) Standalone Document Declaration all over the place [*] +;;;; (03) Element Valid VALIDATE-*-ELEMENT, -CHARACTERS +;;;; (04) Attribute Value Type VALIDATE-ATTRIBUTE +;;;; (05) Unique Element Type Declaration DEFINE-ELEMENT +;;;; (06) Proper Group/PE Nesting P/CSPEC +;;;; (07) No Duplicate Types LEGAL-CONTENT-MODEL-P +;;;; (08) ID VALIDATE-ATTRIBUTE +;;;; (09) One ID per Element Type DEFINE-ATTRIBUTE +;;;; (10) ID Attribute Default DEFINE-ATTRIBUTE +;;;; (11) IDREF VALIDATE-ATTRIBUTE, P/DOCUMENT +;;;; (12) Entity Name VALIDATE-ATTRIBUTE +;;;; (13) Name Token VALIDATE-ATTRIBUTE +;;;; (14) Notation Attributes VALIDATE-ATTRIBUTE, P/ATT-TYPE +;;;; (15) One Notation Per Element Type DEFINE-ATTRIBUTE +;;;; (16) No Notation on Empty Element DEFINE-ELEMENT, -ATTRIBUTE +;;;; (17) Enumeration VALIDATE-ATTRIBUTE +;;;; (18) Required Attribute PROCESS-ATTRIBUTES +;;;; (19) Attribute Default Legal DEFINE-ATTRIBUTE +;;;; (20) Fixed Attribute Default VALIDATE-ATTRIBUTE +;;;; (21) Proper Conditional Section/PE Nesting P/CONDITIONAL-SECT, ... +;;;; (22) Entity Declared [**] +;;;; (23) Notation Declared P/ENTITY-DEF, P/DOCUMENT +;;;; (24) Unique Notation Name DEFINE-NOTATION +;;;; +;;;; [*] Perhaps we could revert the explicit checks of (02), if we did +;;;; _not_ read external subsets of standalone documents when parsing in +;;;; validating mode. Violations of VC (02) constraints would then appear as +;;;; wellformedness violations, right? +;;;; +;;;; [**] Although I haven't investigated this properly yet, I believe that +;;;; we check this VC together with the WFC even in non-validating mode. + +(in-package :cxml) + +#+allegro +(setf (excl:named-readtable :runes) *readtable*) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter *fast* '(optimize (speed 3) (safety 0))) + ;;(defparameter *fast* '(optimize (speed 2) (safety 3))) + ) + +;;; parser context + +(defvar *ctx* nil) + +(defstruct (context (:conc-name nil)) + handler + (dtd nil) + model-stack + (referenced-notations '()) + (id-table (%make-rod-hash-table)) + ;; FIXME: Wofuer ist name-hashtable da? Will man das wissen? + (name-hashtable (make-rod-hashtable :size 2000)) + (standalone-p nil) + (entity-resolver nil) + (disallow-internal-subset nil) + main-zstream) + +(defvar *expand-pe-p* nil) + +(defparameter *namespace-bindings* + '((#"" . nil) + (#"xmlns" . #"http://www.w3.org/2000/xmlns/") + (#"xml" . #"http://www.w3.org/XML/1998/namespace"))) + +;;;; --------------------------------------------------------------------------- +;;;; xstreams +;;;; + + +(defstruct (stream-name + (:print-function print-stream-name)) + entity-name + entity-kind + uri) + +(defun print-stream-name (object stream depth) + (declare (ignore depth)) + (format stream "[~A ~S ~A]" + (rod-string (stream-name-entity-name object)) + (stream-name-entity-kind object) + (stream-name-uri object))) + +(deftype read-element () 'rune) + +(defun call-with-open-xstream (fn stream) + (unwind-protect + (funcall fn stream) + (close-xstream stream))) + +(defmacro with-open-xstream ((var value) &body body) + `(call-with-open-xstream (lambda (,var) ,@body) ,value)) + +(defun call-with-open-xfile (continuation &rest open-args) + (let ((input (apply #'open (car open-args) :element-type '(unsigned-byte 8) (cdr open-args)))) + (unwind-protect + (progn + (funcall continuation (make-xstream input))) + (close input)))) + +(defmacro with-open-xfile ((stream &rest open-args) &body body) + `(call-with-open-xfile (lambda (,stream) .,body) .,open-args)) + +;;;; ------------------------------------------------------------------- +;;;; Rechnen mit Runen +;;;; + +;; Let us first define fast fixnum arithmetric get rid of type +;; checks. (After all we know what we do here). + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %- (&rest xs) `(fx-op - ,@xs)) +(defmacro %* (&rest xs) `(fx-op * ,@xs)) +(defmacro %/ (&rest xs) `(fx-op floor ,@xs)) +(defmacro %and (&rest xs) `(fx-op logand ,@xs)) +(defmacro %ior (&rest xs) `(fx-op logior ,@xs)) +(defmacro %xor (&rest xs) `(fx-op logxor ,@xs)) +(defmacro %ash (&rest xs) `(fx-op ash ,@xs)) +(defmacro %mod (&rest xs) `(fx-op mod ,@xs)) + +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) +(defmacro %<= (&rest xs) `(fx-pred <= ,@xs)) +(defmacro %>= (&rest xs) `(fx-pred >= ,@xs)) +(defmacro %< (&rest xs) `(fx-pred < ,@xs)) +(defmacro %> (&rest xs) `(fx-pred > ,@xs)) + +;;; XXX Geschwindigkeit dieser Definitionen untersuchen! + +(defmacro rune-op (op &rest xs) + `(code-rune (,op ,@(mapcar (lambda (x) `(rune-code ,x)) xs)))) +(defmacro rune-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(rune-code ,x)) xs))) + +(defmacro %rune+ (&rest xs) `(rune-op + ,@xs)) +(defmacro %rune- (&rest xs) `(rune-op - ,@xs)) +(defmacro %rune* (&rest xs) `(rune-op * ,@xs)) +(defmacro %rune/ (&rest xs) `(rune-op floor ,@xs)) +(defmacro %rune-and (&rest xs) `(rune-op logand ,@xs)) +(defmacro %rune-ior (&rest xs) `(rune-op logior ,@xs)) +(defmacro %rune-xor (&rest xs) `(rune-op logxor ,@xs)) +(defmacro %rune-ash (a b) `(code-rune (ash (rune-code ,a) ,b))) +(defmacro %rune-mod (&rest xs) `(rune-op mod ,@xs)) + +(defmacro %rune= (&rest xs) `(rune-pred = ,@xs)) +(defmacro %rune<= (&rest xs) `(rune-pred <= ,@xs)) +(defmacro %rune>= (&rest xs) `(rune-pred >= ,@xs)) +(defmacro %rune< (&rest xs) `(rune-pred < ,@xs)) +(defmacro %rune> (&rest xs) `(rune-pred > ,@xs)) + +;;;; --------------------------------------------------------------------------- +;;;; rod hashtable +;;;; + +;;; make-rod-hashtable +;;; rod-hash-get hashtable rod &optional start end -> value ; successp +;;; (setf (rod-hash-get hashtable rod &optional start end) new-value +;;; + +(defstruct (rod-hashtable (:constructor make-rod-hashtable/low)) + size ;size of table + table ; + ) + +(defun make-rod-hashtable (&key (size 200)) + (setf size (nearest-greater-prime size)) + (make-rod-hashtable/low + :size size + :table (make-array size :initial-element nil))) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defconstant +fixnum-bits+ + (1- (integer-length most-positive-fixnum)) + "Pessimistic approximation of the number of bits of fixnums.") + + (defconstant +fixnum-mask+ + (1- (expt 2 +fixnum-bits+)) + "Pessimistic approximation of the largest bit-mask, still being a fixnum.")) + +(definline stir (a b) + (%and +fixnum-mask+ + (%xor (%ior (%ash (%and a #.(ash +fixnum-mask+ -5)) 5) + (%ash a #.(- 5 +fixnum-bits+))) + b))) + +(definline rod-hash (rod start end) + "Compute a hash code out of a rod." + (let ((res (%- end start))) + (do ((i start (%+ i 1))) + ((%= i end)) + (declare (type fixnum i)) + (setf res (stir res (rune-code (%rune rod i))))) + res)) + +(definline rod=* (x y &key (start1 0) (end1 (length x)) + (start2 0) (end2 (length y))) + (and (%= (%- end1 start1) (%- end2 start2)) + (do ((i start1 (%+ i 1)) + (j start2 (%+ j 1))) + ((%= i end1) + t) + (unless (rune= (%rune x i) (%rune y j)) + (return nil))))) + +(definline rod=** (x y start1 end1 start2 end2) + (and (%= (%- end1 start1) (%- end2 start2)) + (do ((i start1 (%+ i 1)) + (j start2 (%+ j 1))) + ((%= i end1) + t) + (unless (rune= (%rune x i) (%rune y j)) + (return nil))))) + +(defun rod-hash-get (hashtable rod &optional (start 0) (end (length rod))) + (declare (type (simple-array rune (*)) rod)) + (let ((j (%mod (rod-hash rod start end) + (rod-hashtable-size hashtable)))) + (dolist (q (svref (rod-hashtable-table hashtable) j) + (values nil nil nil)) + (declare (type cons q)) + (when (rod=** (car q) rod 0 (length (the (simple-array rune (*)) (car q))) start end) + (return (values (cdr q) t (car q))))))) + +(defun rod-hash-set (new-value hashtable rod &optional (start 0) (end (length rod))) + (let ((j (%mod (rod-hash rod start end) + (rod-hashtable-size hashtable))) + (key nil)) + (dolist (q (svref (rod-hashtable-table hashtable) j) + (progn + (setf key (rod-subseq* rod start end)) + (push (cons key new-value) + (aref (rod-hashtable-table hashtable) j)))) + (when (rod=* (car q) rod :start2 start :end2 end) + (setf key (car q)) + (setf (cdr q) new-value) + (return))) + (values new-value key))) + +#-rune-is-character +(defun rod-subseq* (source start &optional (end (length source))) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (aref source (the fixnum (+ i start)))))))) + +#+rune-is-character +(defun rod-subseq* (source start &optional (end (length source))) + (subseq source start end)) + +(deftype ufixnum () `(unsigned-byte ,(integer-length most-positive-fixnum))) + +#-rune-is-character +(defun rod-subseq** (source start &optional (end (length source))) + (declare (type (simple-array rune (*)) source) + (type ufixnum start) + (type ufixnum end) + (optimize (speed 3) (safety 0))) + (let ((res (make-array (%- end start) :element-type 'rune))) + (declare (type (simple-array rune (*)) res)) + (let ((i (%- end start))) + (declare (type ufixnum i)) + (loop + (setf i (- i 1)) + (when (= i 0) + (return)) + (setf (%rune res i) (%rune source (the ufixnum (+ i start)))))) + res)) + +#+rune-is-character +(defun rod-subseq** (source start &optional (end (length source))) + (subseq source start end)) + +(defun (setf rod-hash-get) (new-value hashtable rod &optional (start 0) (end (length rod))) + (rod-hash-set new-value hashtable rod start end)) + +(defun intern-name (rod &optional (start 0) (end (length rod))) + (multiple-value-bind (value successp key) (rod-hash-get (name-hashtable *ctx*) rod start end) + (declare (ignore value)) + (if successp + key + (nth-value 1 (rod-hash-set t (name-hashtable *ctx*) rod start end))))) + +;;;; --------------------------------------------------------------------------- +;;;; +;;;; rod collector +;;;; + +(defvar *scratch-pad*) +(defvar *scratch-pad-2*) +(defvar *scratch-pad-3*) +(defvar *scratch-pad-4*) + +(declaim (type (simple-array rune (*)) + *scratch-pad* *scratch-pad-2* *scratch-pad-3* *scratch-pad-4*)) + +(defmacro with-scratch-pads ((&optional) &body body) + `(let ((*scratch-pad* (make-array 1024 :element-type 'rune)) + (*scratch-pad-2* (make-array 1024 :element-type 'rune)) + (*scratch-pad-3* (make-array 1024 :element-type 'rune)) + (*scratch-pad-4* (make-array 1024 :element-type 'rune))) + ,@body)) + +(defmacro %put-unicode-char (code-var put) + `(progn + (cond ((%> ,code-var #xFFFF) + (,put (the rune (code-rune (%+ #xD7C0 (%ash ,code-var -10))))) + (,put (the rune (code-rune (%ior #xDC00 (%and ,code-var #x03FF)))))) + (t + (,put (code-rune ,code-var)))))) + +(defun adjust-array-by-copying (old-array new-size) + "Adjust an array by copying and thus ensures, that result is a SIMPLE-ARRAY." + (let ((res (make-array new-size :element-type (array-element-type old-array)))) + (replace res old-array + :start1 0 :end1 (length old-array) + :start2 0 :end2 (length old-array)) + res)) + +(defmacro with-rune-collector-aux (scratch collect body mode) + (let ((rod (gensym)) + (n (gensym)) + (i (gensym)) + (b (gensym))) + `(let ((,n (length ,scratch)) + (,i 0) + (,b ,scratch)) + (declare (type fixnum ,n ,i)) + (macrolet + ((,collect (x) + `((lambda (x) + (locally + (declare #.*fast*) + (when (%>= ,',i ,',n) + (setf ,',n (* 2 ,',n)) + (setf ,',b + (setf ,',scratch + (adjust-array-by-copying ,',scratch ,',n)))) + (setf (aref (the (simple-array rune (*)) ,',b) ,',i) x) + (incf ,',i))) + ,x))) + ,@body + ,(ecase mode + (:intern + `(intern-name ,b 0 ,i)) + (:copy + `(let ((,rod (make-rod ,i))) + (while (not (%= ,i 0)) + (setf ,i (%- ,i 1)) + (setf (%rune ,rod ,i) + (aref (the (simple-array rune (*)) ,b) ,i))) + ,rod)) + (:raw + `(values ,b 0 ,i)) + ))))) + +'(defmacro with-rune-collector-aux (scratch collect body mode) + (let ((rod (gensym)) + (n (gensym)) + (i (gensym)) + (b (gensym))) + `(let ((,n (length ,scratch)) + (,i 0)) + (declare (type fixnum ,n ,i)) + (macrolet + ((,collect (x) + `((lambda (x) + (locally + (declare #.*fast*) + (when (%>= ,',i ,',n) + (setf ,',n (* 2 ,',n)) + (setf ,',scratch + (setf ,',scratch + (adjust-array-by-copying ,',scratch ,',n)))) + (setf (aref (the (simple-array rune (*)) ,',scratch) ,',i) x) + (incf ,',i))) + ,x))) + ,@body + ,(ecase mode + (:intern + `(intern-name ,scratch 0 ,i)) + (:copy + `(let ((,rod (make-rod ,i))) + (while (%> ,i 0) + (setf ,i (%- ,i 1)) + (setf (%rune ,rod ,i) + (aref (the (simple-array rune (*)) ,scratch) ,i))) + ,rod)) + (:raw + `(values ,scratch 0 ,i)) + ))))) + +(defmacro with-rune-collector ((collect) &body body) + `(with-rune-collector-aux *scratch-pad* ,collect ,body :copy)) + +(defmacro with-rune-collector-2 ((collect) &body body) + `(with-rune-collector-aux *scratch-pad-2* ,collect ,body :copy)) + +(defmacro with-rune-collector-3 ((collect) &body body) + `(with-rune-collector-aux *scratch-pad-3* ,collect ,body :copy)) + +(defmacro with-rune-collector-4 ((collect) &body body) + `(with-rune-collector-aux *scratch-pad-4* ,collect ,body :copy)) + +(defmacro with-rune-collector/intern ((collect) &body body) + `(with-rune-collector-aux *scratch-pad* ,collect ,body :intern)) + +(defmacro with-rune-collector/raw ((collect) &body body) + `(with-rune-collector-aux *scratch-pad* ,collect ,body :raw)) + +#| +(defmacro while-reading-runes ((reader stream-in) &rest body) + ;; Thou shalt not leave body via a non local exit + (let ((stream (make-symbol "STREAM")) + (rptr (make-symbol "RPTR")) + (fptr (make-symbol "FPTR")) + (buf (make-symbol "BUF")) ) + `(let* ((,stream ,stream-in) + (,rptr (xstream-read-ptr ,stream)) + (,fptr (xstream-fill-ptr ,stream)) + (,buf (xstream-buffer ,stream))) + (declare (type fixnum ,rptr ,fptr) + (type xstream ,stream)) + (macrolet ((,reader (res-var) + `(cond ((%= ,',rptr ,',fptr) + (setf (xstream-read-ptr ,',stream) ,',rptr) + (setf ,res-var (xstream-underflow ,',stream)) + (setf ,',rptr (xstream-read-ptr ,',stream)) + (setf ,',fptr (xstream-fill-ptr ,',stream)) + (setf ,',buf (xstream-buffer ,',stream))) + (t + (setf ,res-var + (aref (the (simple-array read-element (*)) ,',buf) + (the fixnum ,',rptr))) + (setf ,',rptr (%+ ,',rptr 1)))))) + (prog1 + (let () .,body) + (setf (xstream-read-ptr ,stream) ,rptr) ))))) +|# + +;;;; --------------------------------------------------------------------------- +;;;; DTD +;;;; + +(define-condition xml-parse-error (simple-error) ()) +(define-condition well-formedness-violation (xml-parse-error) ()) +(define-condition validity-error (xml-parse-error) ()) + +;; We make some effort to signal end of file as a special condition, but we +;; don't actually try very hard. Not sure whether we should. Right now I +;; would prefer not to document this class. +(define-condition end-of-xstream (well-formedness-violation) ()) + +(defun describe-xstream (x s) + (format s " Line ~D, column ~D in ~A~%" + (xstream-line-number x) + (xstream-column-number x) + (let ((name (xstream-name x))) + (cond + ((null name) + "<anonymous stream>") + ((eq :main (stream-name-entity-kind name)) + (stream-name-uri name)) + (t + name))))) + +(defun %error (class stream message) + (let* ((zmain (if *ctx* (main-zstream *ctx*) nil)) + (zstream (if (zstream-p stream) stream zmain)) + (xstream (if (xstream-p stream) stream nil)) + (s (make-string-output-stream))) + (write-line message s) + (when xstream + (write-line "Location:" s) + (describe-xstream xstream s)) + (when zstream + (let ((stack + (remove xstream (remove :stop (zstream-input-stack zstream))))) + (when stack + (write-line "Context:" s) + (dolist (x stack) + (describe-xstream x s))))) + (when (and zmain (not (eq zstream zmain))) + (let ((stack + (remove xstream (remove :stop (zstream-input-stack zmain))))) + (when stack + (write-line "Context in main document:" s) + (dolist (x stack) + (describe-xstream x s))))) + (error class + :format-control "~A" + :format-arguments (list (get-output-stream-string s))))) + +(defun validity-error (fmt &rest args) + (%error 'validity-error + nil + (format nil "Document not valid: ~?" fmt args))) + +(defun wf-error (stream fmt &rest args) + (%error 'well-formedness-violation + stream + (format nil "Document not well-formed: ~?" fmt args))) + +(defun eox (stream &optional x &rest args) + (%error 'end-of-xstream + stream + (format nil "End of file~@[: ~?~]" x args))) + +(defvar *validate* t) +(defvar *external-subset-p* nil) + +(defun validate-start-element (ctx name) + (when *validate* + (let* ((pair (car (model-stack ctx))) + (newval (funcall (car pair) name))) + (unless newval + (validity-error "(03) Element Valid: ~A" (rod-string name))) + (setf (car pair) newval) + (let ((e (find-element name (dtd ctx)))) + (unless e + (validity-error "(03) Element Valid: no definition for ~A" + (rod-string name))) + (maybe-compile-cspec e) + (push (copy-cons (elmdef-compiled-cspec e)) (model-stack ctx)))))) + +(defun copy-cons (x) + (cons (car x) (cdr x))) + +(defun validate-end-element (ctx name) + (when *validate* + (let ((pair (car (model-stack ctx)))) + (unless (eq (funcall (car pair) nil) t) + (validity-error "(03) Element Valid: ~A" (rod-string name))) + (pop (model-stack ctx))))) + +(defun validate-characters (ctx rod) + (when *validate* + (let ((pair (car (model-stack ctx)))) + (unless (funcall (cdr pair) rod) + (validity-error "(03) Element Valid: unexpected PCDATA"))))) + +(defun standalone-check-necessary-p (def) + (and *validate* + (standalone-p *ctx*) + (etypecase def + (elmdef (elmdef-external-p def)) + (attdef (attdef-external-p def))))) + +;; attribute validation, defaulting, and normalization -- except for for +;; uniqueness checks, which are done after namespaces have been declared +(defun process-attributes (ctx name attlist) + (let ((e (find-element name (dtd ctx)))) + (cond + (e + (dolist (ad (elmdef-attributes e)) ;handle default values + (unless (get-attribute (attdef-name ad) attlist) + (case (attdef-default ad) + (:IMPLIED) + (:REQUIRED + (when *validate* + (validity-error "(18) Required Attribute: ~S not specified" + (rod-string (attdef-name ad))))) + (t + (when (standalone-check-necessary-p ad) + (validity-error "(02) Standalone Document Declaration: missing attribute value")) + (push (sax:make-attribute :qname (attdef-name ad) + :value (cadr (attdef-default ad)) + :specified-p nil) + attlist))))) + (dolist (a attlist) ;normalize non-CDATA values + (let* ((qname (sax:attribute-qname a)) + (adef (find-attribute e qname))) + (when adef + (when (and *validate* + sax:*namespace-processing* + (eq (attdef-type adef) :ID) + (find #/: (sax:attribute-value a))) + (validity-error "colon in ID attribute")) + (unless (eq (attdef-type adef) :CDATA) + (let ((canon (canon-not-cdata-attval (sax:attribute-value a)))) + (when (and (standalone-check-necessary-p adef) + (not (rod= (sax:attribute-value a) canon))) + (validity-error "(02) Standalone Document Declaration: attribute value not normalized")) + (setf (sax:attribute-value a) canon)))))) + (when *validate* ;maybe validate attribute values + (dolist (a attlist) + (validate-attribute ctx e a)))) + ((and *validate* attlist) + (validity-error "(04) Attribute Value Type: no definition for element ~A" + (rod-string name))))) + attlist) + +(defun get-attribute (name attributes) + (member name attributes :key #'sax:attribute-qname :test #'rod=)) + +(defun validate-attribute (ctx e a) + (when (sax:attribute-specified-p a) ;defaults checked by DEFINE-ATTRIBUTE + (let* ((qname (sax:attribute-qname a)) + (adef + (or (find-attribute e qname) + (validity-error "(04) Attribute Value Type: not declared: ~A" + (rod-string qname))))) + (validate-attribute* ctx adef (sax:attribute-value a))))) + +(defun validate-attribute* (ctx adef value) + (let ((type (attdef-type adef)) + (default (attdef-default adef))) + (when (and (listp default) + (eq (car default) :FIXED) + (not (rod= value (cadr default)))) + (validity-error "(20) Fixed Attribute Default: expected ~S but got ~S" + (rod-string (cadr default)) + (rod-string value))) + (ecase (if (listp type) (car type) type) + (:ID + (unless (valid-name-p value) + (validity-error "(08) ID: not a name: ~S" (rod-string value))) + (when (eq (gethash value (id-table ctx)) t) + (validity-error "(08) ID: ~S not unique" (rod-string value))) + (setf (gethash value (id-table ctx)) t)) + (:IDREF + (validate-idref ctx value)) + (:IDREFS + (let ((names (split-names value))) + (unless names + (validity-error "(11) IDREF: malformed names")) + (mapc (curry #'validate-idref ctx) names))) + (:NMTOKEN + (validate-nmtoken value)) + (:NMTOKENS + (let ((tokens (split-names value))) + (unless tokens + (validity-error "(13) Name Token: malformed NMTOKENS")) + (mapc #'validate-nmtoken tokens))) + (:ENUMERATION + (unless (member value (cdr type) :test #'rod=) + (validity-error "(17) Enumeration: value not declared: ~S" + (rod-string value)))) + (:NOTATION + (unless (member value (cdr type) :test #'rod=) + (validity-error "(14) Notation Attributes: ~S" (rod-string value)))) + (:ENTITY + (validate-entity value)) + (:ENTITIES + (let ((names (split-names value))) + (unless names + (validity-error "(13) Name Token: malformed NMTOKENS")) + (mapc #'validate-entity names))) + (:CDATA)))) + +(defun validate-idref (ctx value) + (unless (valid-name-p value) + (validity-error "(11) IDREF: not a name: ~S" (rod-string value))) + (unless (gethash value (id-table ctx)) + (setf (gethash value (id-table ctx)) nil))) + +(defun validate-nmtoken (value) + (unless (valid-nmtoken-p value) + (validity-error "(13) Name Token: not a NMTOKEN: ~S" + (rod-string value)))) + +(defstruct (entdef (:constructor))) + +(defstruct (internal-entdef + (:include entdef) + (:constructor make-internal-entdef (value)) + (:conc-name #:entdef-)) + (value (error "missing argument") :type rod) + (expansion nil) + (external-subset-p *external-subset-p*)) + +(defstruct (external-entdef + (:include entdef) + (:constructor make-external-entdef (extid ndata)) + (:conc-name #:entdef-)) + (extid (error "missing argument") :type extid) + (ndata nil :type (or rod null))) + +(defun validate-entity (value) + (unless (valid-name-p value) + (validity-error "(12) Entity Name: not a name: ~S" (rod-string value))) + (let ((def (let ((*validate* + ;; Similarly the entity refs are internal and + ;; don't need normalization ... the unparsed + ;; entities (and entities) aren't "references" + ;; -- sun/valid/sa03.xml + nil)) + (get-entity-definition value :general (dtd *ctx*))))) + (unless (and (typep def 'external-entdef) (entdef-ndata def)) + ;; unparsed entity + (validity-error "(12) Entity Name: ~S" (rod-string value))))) + +(defun split-names (rod) + (flet ((whitespacep (x) + (or (rune= x #/U+0009) + (rune= x #/U+000A) + (rune= x #/U+000D) + (rune= x #/U+0020)))) + (if (let ((n (length rod))) + (and (not (zerop n)) + (or (whitespacep (rune rod 0)) + (whitespacep (rune rod (1- n)))))) + nil + (split-sequence-if #'whitespacep rod :remove-empty-subseqs t)))) + +(defun zstream-base-sysid (zstream) + (let ((base-sysid + (dolist (k (zstream-input-stack zstream)) + (let ((base-sysid (stream-name-uri (xstream-name k)))) + (when base-sysid (return base-sysid)))))) + base-sysid)) + +(defun absolute-uri (sysid source-stream) + (let ((base-sysid (zstream-base-sysid source-stream))) + ;; XXX is the IF correct? + (if base-sysid + (puri:merge-uris sysid base-sysid) + sysid))) + +(defstruct (extid (:constructor make-extid (public system))) + (public nil :type (or rod null)) + (system (error "missing argument") :type (or puri:uri null))) + +(defun absolute-extid (source-stream extid) + (let ((sysid (extid-system extid)) + (result (copy-extid extid))) + (setf (extid-system result) (absolute-uri sysid source-stream)) + result)) + +(defun define-entity (source-stream name kind def) + (setf name (intern-name name)) + (when (and sax:*namespace-processing* (find #/: name)) + (wf-error source-stream "colon in entity name")) + (let ((table + (ecase kind + (:general (dtd-gentities (dtd *ctx*))) + (:parameter (dtd-pentities (dtd *ctx*)))))) + (unless (gethash name table) + (when (and source-stream (handler *ctx*)) + (report-entity (handler *ctx*) kind name def)) + (when (typep def 'external-entdef) + (setf (entdef-extid def) + (absolute-extid source-stream (entdef-extid def)))) + (setf (gethash name table) + (cons *external-subset-p* def))))) + +(defun get-entity-definition (entity-name kind dtd) + (unless dtd + (wf-error nil "entity not defined: ~A" (rod-string entity-name))) + (destructuring-bind (extp &rest def) + (gethash entity-name + (ecase kind + (:general (dtd-gentities dtd)) + (:parameter (dtd-pentities dtd))) + '(nil)) + (when (and *validate* (standalone-p *ctx*) extp) + (validity-error "(02) Standalone Document Declaration: entity reference: ~S" + (rod-string entity-name))) + def)) + +(defun entity->xstream (zstream entity-name kind &optional internalp) + ;; `zstream' is for error messages + (let ((def (get-entity-definition entity-name kind (dtd *ctx*)))) + (unless def + (wf-error zstream "Entity '~A' is not defined." (rod-string entity-name))) + (let (r) + (etypecase def + (internal-entdef + (when (and (standalone-p *ctx*) + (entdef-external-subset-p def)) + (wf-error + zstream + "entity declared in external subset, but document is standalone")) + (setf r (make-rod-xstream (entdef-value def))) + (setf (xstream-name r) + (make-stream-name :entity-name entity-name + :entity-kind kind + :uri nil))) + (external-entdef + (when internalp + (wf-error zstream + "entity not internal: ~A" (rod-string entity-name))) + (when (entdef-ndata def) + (wf-error zstream + "reference to unparsed entity: ~A" + (rod-string entity-name))) + (setf r (xstream-open-extid (extid-using-catalog (entdef-extid def)))) + (setf (stream-name-entity-name (xstream-name r)) entity-name + (stream-name-entity-kind (xstream-name r)) kind))) + r))) + +(defun checked-get-entdef (name type) + (let ((def (get-entity-definition name type (dtd *ctx*)))) + (unless def + (wf-error nil "Entity '~A' is not defined." (rod-string name))) + def)) + +(defun xstream-open-extid (extid) + (let* ((sysid (extid-system extid)) + (stream + (or (funcall (or (entity-resolver *ctx*) (constantly nil)) + (extid-public extid) + (extid-system extid)) + (open (uri-to-pathname sysid) + :element-type '(unsigned-byte 8) + :direction :input)))) + (make-xstream stream + :name (make-stream-name :uri sysid) + :initial-speed 1))) + +(defun call-with-entity-expansion-as-stream (zstream cont name kind internalp) + ;; `zstream' is for error messages + (let ((in (entity->xstream zstream name kind internalp))) + (unwind-protect + (funcall cont in) + (close-xstream in)))) + +(defun ensure-dtd () + (unless (dtd *ctx*) + (setf (dtd *ctx*) (make-dtd)) + (define-default-entities))) + +(defun define-default-entities () + (define-entity nil #"lt" :general (make-internal-entdef #"<")) + (define-entity nil #"gt" :general (make-internal-entdef #">")) + (define-entity nil #"amp" :general (make-internal-entdef #"&")) + (define-entity nil #"apos" :general (make-internal-entdef #"'")) + (define-entity nil #"quot" :general (make-internal-entdef #"\""))) + +(defstruct attdef + ;; an attribute definition + element ;name of element this attribute belongs to + name ;name of attribute + type ;type of attribute; either one of :CDATA, :ID, :IDREF, :IDREFS, + ; :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS, or + ; (:NOTATION <name>*) + ; (:ENUMERATION <name>*) + default ;default value of attribute: + ; :REQUIRED, :IMPLIED, (:FIXED content) or (:DEFAULT content) + (external-p *external-subset-p*) + ) + +(defstruct elmdef + ;; an element definition + name ;name of the element + content ;content model [*] + attributes ;list of defined attributes + compiled-cspec ;cons of validation function for contentspec + (external-p *external-subset-p*) + ) + +;; [*] in XML it is possible to define attributes before the element +;; itself is defined and since we hang attribute definitions into the +;; relevant element definitions, the `content' slot indicates whether an +;; element was actually defined. It is NIL until set to a content model +;; when the element type declaration is processed. + +(defun %make-rod-hash-table () + ;; XXX with portable hash tables, this is the only way to case-sensitively + ;; use rods. However, EQUALP often has horrible performance! Most Lisps + ;; provide extensions for user-defined equality, we should use them! There + ;; is also a home-made hash table for rods defined below, written by + ;; Gilbert (I think). We could also use that one, but I would prefer the + ;; first method, even if it's unportable. + (make-hash-table :test + #+rune-is-character 'equal + #-rune-is-character 'equalp)) + +(defstruct dtd + (elements (%make-rod-hash-table)) ;elmdefs + (gentities (%make-rod-hash-table)) ;general entities + (pentities (%make-rod-hash-table)) ;parameter entities + (notations (%make-rod-hash-table))) + +(defun make-dtd-cache () + (puri:make-uri-space)) + +(defvar *cache-all-dtds* nil) +(defvar *dtd-cache* (make-dtd-cache)) + +(defun remdtd (uri dtd-cache) + (setf uri (puri:intern-uri uri dtd-cache)) + (prog1 + (and (getf (puri:uri-plist uri) 'dtd) t) + (puri:unintern-uri uri dtd-cache))) + +(defun clear-dtd-cache (dtd-cache) + (puri:unintern-uri t dtd-cache)) + +(defun getdtd (uri dtd-cache) + (getf (puri:uri-plist (puri:intern-uri uri dtd-cache)) 'dtd)) + +(defun (setf getdtd) (newval uri dtd-cache) + (setf (getf (puri:uri-plist (puri:intern-uri uri dtd-cache)) 'dtd) newval) + newval) + + +;;;; + +(defun find-element (name dtd) + (gethash name (dtd-elements dtd))) + +(defun define-element (dtd element-name &optional content-model) + (let ((e (find-element element-name dtd))) + (cond + ((null e) + (setf (gethash element-name (dtd-elements dtd)) + (make-elmdef :name element-name :content content-model))) + ((null content-model) + e) + (t + (when *validate* + (when (elmdef-content e) + (validity-error "(05) Unique Element Type Declaration")) + (when (eq content-model :EMPTY) + (dolist (ad (elmdef-attributes e)) + (let ((type (attdef-type ad))) + (when (and (listp type) (eq (car type) :NOTATION)) + (validity-error "(16) No Notation on Empty Element: ~S" + (rod-string element-name))))))) + (sax:element-declaration (handler *ctx*) element-name content-model) + (setf (elmdef-content e) content-model) + (setf (elmdef-external-p e) *external-subset-p*) + e)))) + +(defvar *redefinition-warning* nil) + +(defun define-attribute (dtd element name type default) + (let ((adef (make-attdef :element element + :name name + :type type + :default default)) + (e (or (find-element element dtd) + (define-element dtd element)))) + (when (and *validate* (listp default)) + (unless (eq (attdef-type adef) :CDATA) + (setf (second default) (canon-not-cdata-attval (second default)))) + (validate-attribute* *ctx* adef (second default))) + (cond ((find-attribute e name) + (when *redefinition-warning* + (warn "Attribute \"~A\" of \"~A\" not redefined." + (rod-string name) + (rod-string element)))) + (t + (when *validate* + (when (eq type :ID) + (when (find :ID (elmdef-attributes e) :key #'attdef-type) + (validity-error "(09) One ID per Element Type: element ~A" + (rod-string element))) + (unless (member default '(:REQUIRED :IMPLIED)) + (validity-error "(10) ID Attribute Default: ~A" + (rod-string element)))) + (flet ((notationp (type) + (and (listp type) (eq (car type) :NOTATION)))) + (when (notationp type) + (when (find-if #'notationp (elmdef-attributes e) + :key #'attdef-type) + (validity-error "(15) One Notation Per Element Type: ~S" + (rod-string element))) + (when (eq (elmdef-content e) :EMPTY) + (validity-error "(16) No Notation on Empty Element: ~S" + (rod-string element)))))) + (sax:attribute-declaration (handler *ctx*) element name type default) + (push adef (elmdef-attributes e)))))) + +(defun find-attribute (elmdef name) + (find name (elmdef-attributes elmdef) :key #'attdef-name :test #'rod=)) + +(defun define-notation (dtd name id) + (let ((ns (dtd-notations dtd))) + (when (gethash name ns) + (validity-error "(24) Unique Notation Name: ~S" (rod-string name))) + (setf (gethash name ns) id))) + +(defun find-notation (name dtd) + (gethash name (dtd-notations dtd))) + +;;;; --------------------------------------------------------------------------- +;;;; z streams and lexer +;;;; + +(defstruct zstream + token-category + token-semantic + input-stack) + +(defun read-token (input) + (cond ((zstream-token-category input) + (multiple-value-prog1 + (values (zstream-token-category input) + (zstream-token-semantic input)) + (setf (zstream-token-category input) nil + (zstream-token-semantic input) nil))) + (t + (read-token-2 input)))) + +(defun peek-token (input) + (cond ((zstream-token-category input) + (values + (zstream-token-category input) + (zstream-token-semantic input))) + (t + (multiple-value-bind (c s) (read-token input) + (setf (zstream-token-category input) c + (zstream-token-semantic input) s)) + (values (zstream-token-category input) + (zstream-token-semantic input))))) + +(defun read-token-2 (input) + (cond ((null (zstream-input-stack input)) + (values :eof nil)) + (t + (let ((c (peek-rune (car (zstream-input-stack input))))) + (cond ((eq c :eof) + (cond ((eq (cadr (zstream-input-stack input)) :stop) + (values :eof nil)) + (t + (close-xstream (pop (zstream-input-stack input))) + (if (null (zstream-input-stack input)) + (values :eof nil) + (values :S nil) ;fake #x20 after PE expansion + )))) + (t + (read-token-3 input))))))) + +(defvar *data-behaviour* + ) ;either :DTD or :DOC + +(defun read-token-3 (zinput) + (let ((input (car (zstream-input-stack zinput)))) + ;; PI Comment + (let ((c (read-rune input))) + (cond + ;; first the common tokens + ((rune= #/< c) + (read-token-after-|<| zinput input)) + ;; now dispatch + (t + (ecase *data-behaviour* + (:DTD + (cond ((rune= #/\[ c) :\[) + ((rune= #/\] c) :\]) + ((rune= #/\( c) :\() + ((rune= #/\) c) :\)) + ((rune= #/\| c) :\|) + ((rune= #/\> c) :\>) + ((rune= #/\" c) :\") + ((rune= #/\' c) :\') + ((rune= #/\, c) :\,) + ((rune= #/\? c) :\?) + ((rune= #/\* c) :\*) + ((rune= #/\+ c) :\+) + ((name-rune-p c) + (unread-rune c input) + (values :nmtoken (read-name-token input))) + ((rune= #/# c) + (let ((q (read-name-token input))) + (cond ((rod= q '#.(string-rod "REQUIRED")) :|#REQUIRED|) + ((rod= q '#.(string-rod "IMPLIED")) :|#IMPLIED|) + ((rod= q '#.(string-rod "FIXED")) :|#FIXED|) + ((rod= q '#.(string-rod "PCDATA")) :|#PCDATA|) + (t + (wf-error zinput "Unknown token: ~S." q))))) + ((or (rune= c #/U+0020) + (rune= c #/U+0009) + (rune= c #/U+000D) + (rune= c #/U+000A)) + (values :S nil)) + ((rune= #/% c) + (cond ((name-start-rune-p (peek-rune input)) + ;; an entity reference + (read-pe-reference zinput)) + (t + (values :%)))) + (t + (wf-error zinput "Unexpected character ~S." c)))) + (:DOC + (cond + ((rune= c #/&) + (multiple-value-bind (kind data) (read-entity-like input) + (cond ((eq kind :ENTITY-REFERENCE) + (values :ENTITY-REF data)) + ((eq kind :CHARACTER-REFERENCE) + (values :CDATA + (with-rune-collector (collect) + (%put-unicode-char data collect))))))) + (t + (unread-rune c input) + (values :CDATA (read-cdata input))))))))))) + +(definline check-rune (input actual expected) + (unless (eql actual expected) + (wf-error input "expected #/~A but found #/~A" + (rune-char expected) + (rune-char actual)))) + +(defun read-pe-reference (zinput) + (let* ((input (car (zstream-input-stack zinput))) + (nam (read-name-token input))) + (check-rune input #/\; (read-rune input)) + (cond (*expand-pe-p* + ;; no external entities here! + (let ((i2 (entity->xstream zinput nam :parameter))) + (zstream-push i2 zinput)) + (values :S nil) ;space before inserted PE expansion. + ) + (t + (values :PE-REFERENCE nam)) ))) + +(defun read-token-after-|<| (zinput input) + (let ((d (read-rune input))) + (cond ((eq d :eof) + (eox input "EOF after '<'")) + ((rune= #/! d) + (read-token-after-|<!| input)) + ((rune= #/? d) + (multiple-value-bind (target content) (read-pi input) + (cond ((rod= target '#.(string-rod "xml")) + (values :xml-decl (cons target content))) + ((rod-equal target '#.(string-rod "XML")) + (wf-error zinput + "You lost -- no XML processing instructions.")) + ((and sax:*namespace-processing* (position #/: target)) + (wf-error zinput + "Processing instruction target ~S is not a ~ + valid NcName." + (mu target))) + (t + (values :PI (cons target content)))))) + ((rune= #// d) + (let ((c (peek-rune input))) + (cond ((name-start-rune-p c) + (read-tag-2 zinput input :etag)) + (t + (wf-error zinput + "Expecting name start rune after \"</\"."))))) + ((name-start-rune-p d) + (unread-rune d input) + (read-tag-2 zinput input :stag)) + (t + (wf-error zinput "Expected '!' or '?' after '<' in DTD."))))) + +(defun read-token-after-|<!| (input) + (let ((d (read-rune input))) + (cond ((eq d :eof) + (eox input "EOF after \"<!\".")) + ((name-start-rune-p d) + (unread-rune d input) + (let ((name (read-name-token input))) + (cond ((rod= name '#.(string-rod "ELEMENT")) :|<!ELEMENT|) + ((rod= name '#.(string-rod "ENTITY")) :|<!ENTITY|) + ((rod= name '#.(string-rod "ATTLIST")) :|<!ATTLIST|) + ((rod= name '#.(string-rod "NOTATION")) :|<!NOTATION|) + ((rod= name '#.(string-rod "DOCTYPE")) :|<!DOCTYPE|) + (t + (wf-error input"`<!~A' unknown." (rod-string name)))))) + ((rune= #/\[ d) + (values :|<![| nil)) + ((rune= #/- d) + (setf d (read-rune input)) + (cond ((rune= #/- d) + (values + :COMMENT + (read-comment-content input))) + (t + (wf-error input"Bad character ~S after \"<!-\"" d)))) + (t + (wf-error input "Bad character ~S after \"<!\"" d))))) + +(definline read-S? (input) + (while (member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D) + :test #'eql) + (consume-rune input))) + +(defun read-attribute-list (zinput input imagine-space-p) + (cond ((or imagine-space-p + (let ((c (peek-rune input))) + (and (not (eq c :eof)) + (space-rune-p c)))) + (read-S? input) + (cond ((eq (peek-rune input) :eof) + nil) + ((name-start-rune-p (peek-rune input)) + (cons (read-attribute zinput input) + (read-attribute-list zinput input nil))) + (t + nil))) + (t + nil))) + +(defun read-entity-like (input) + "Read an entity reference off the xstream `input'. Returns two values: + either :ENTITY-REFERENCE <interned-rod> in case of a named entity + or :CHARACTER-REFERENCE <integer> in case of character references. + The initial #\\& is considered to be consumed already." + (let ((c (peek-rune input))) + (cond ((eq c :eof) + (eox input "EOF after '&'")) + ((rune= c #/#) + (values :CHARACTER-REFERENCE (read-character-reference input))) + (t + (unless (name-start-rune-p (peek-rune input)) + (wf-error input "Expecting name after &.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (unless (rune= c #/\;) + (wf-error input "Expected \";\".")) + (values :ENTITY-REFERENCE name)))))) + +(defun read-tag-2 (zinput input kind) + (let ((name (read-name-token input)) + (atts nil)) + (setf atts (read-attribute-list zinput input nil)) + + ;; check for double attributes + (do ((q atts (cdr q))) + ((null q)) + (cond ((find (caar q) (cdr q) :key #'car) + (wf-error zinput "Attribute ~S has two definitions in element ~S." + (rod-string (caar q)) + (rod-string name))))) + + (cond ((eq (peek-rune input) #/>) + (consume-rune input) + (values kind (cons name atts))) + ((eq (peek-rune input) #//) + (consume-rune input) + (check-rune input #/> (read-rune input)) + (values :ztag (cons name atts))) + (t + (wf-error zinput "syntax error in read-tag-2.")) ))) + +(defun read-attribute (zinput input) + (unless (name-start-rune-p (peek-rune input)) + (wf-error zinput "Expected name.")) + ;; arg thanks to the post mortem nature of name space declarations, + ;; we could only process the attribute values post mortem. + (let ((name (read-name-token input))) + (while (let ((c (peek-rune input))) + (and (not (eq c :eof)) + (or (rune= c #/U+0020) + (rune= c #/U+0009) + (rune= c #/U+000A) + (rune= c #/U+000D)))) + (consume-rune input)) + (unless (eq (read-rune input) #/=) + (wf-error zinput "Expected \"=\".")) + (while (let ((c (peek-rune input))) + (and (not (eq c :eof)) + (or (rune= c #/U+0020) + (rune= c #/U+0009) + (rune= c #/U+000A) + (rune= c #/U+000D)))) + (consume-rune input)) + (cons name (read-att-value-2 input)))) + +(defun canon-not-cdata-attval (value) + ;; | If the declared value is not CDATA, then the XML processor must + ;; | further process the normalized attribute value by discarding any + ;; | leading and trailing space (#x20) characters, and by replacing + ;; | sequences of space (#x20) characters by a single space (#x20) + ;; | character. + (with-rune-collector (collect) + (let ((gimme-20 nil) + (anything-seen-p nil)) + (map nil (lambda (c) + (cond ((rune= c #/u+0020) + (setf gimme-20 t)) + (t + (when (and anything-seen-p gimme-20) + (collect #/u+0020)) + (setf gimme-20 nil) + (setf anything-seen-p t) + (collect c)))) + value)))) + +(definline data-rune-p (rune) + ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + ;; + ;; FIXME: das halte ich fuer verkehrt. Surrogates als Unicode-Zeichen + ;; sind verboten. Das liegt hier aber nicht vor, denn wir arbeiten + ;; ja tatsaechlich mit UTF-16. Verboten ist es nur, wenn wir ein + ;; solches Zeichen beim Dekodieren finden, das wird aber eben + ;; in encodings.lisp bereits geprueft. --david + (let ((c (rune-code rune))) + (or (= c #x9) (= c #xA) (= c #xD) + (<= #x20 c #xD7FF) + (<= #xE000 c #xFFFD) + (<= #xD800 c #xDBFF) + (<= #xDC00 c #xDFFF)))) + +(defun read-att-value (zinput input mode &optional canon-space-p (delim nil)) + (with-rune-collector-2 (collect) + (labels ((muffle (input delim) + (let (c) + (loop + (setf c (read-rune input)) + (cond ((eql delim c) + (return)) + ((eq c :eof) + (eox input "EOF")) + ((rune= c #/&) + (setf c (peek-rune input)) + (cond ((eql c :eof) + (eox input)) + ((rune= c #/#) + (let ((c (read-character-reference input))) + (%put-unicode-char c collect))) + (t + (unless (name-start-rune-p (peek-rune input)) + (wf-error zinput "Expecting name after &.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (check-rune input c #/\;) + (ecase mode + (:ATT + (recurse-on-entity + zinput name :general + (lambda (zinput) + (muffle (car (zstream-input-stack zinput)) + :eof)) + t)) + (:ENT + ;; bypass, but never the less we + ;; need to check for legal + ;; syntax. + ;; Must it be defined? + ;; allerdings: unparsed sind verboten + (collect #/&) + (map nil (lambda (x) (collect x)) name) + (collect #/\; ))))))) + ((and (eq mode :ENT) (rune= c #/%)) + (let ((d (peek-rune input))) + (when (eq d :eof) + (eox input)) + (unless (name-start-rune-p d) + (wf-error zinput "Expecting name after %."))) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (check-rune input c #/\;) + (cond (*expand-pe-p* + (recurse-on-entity + zinput name :parameter + (lambda (zinput) + (muffle (car (zstream-input-stack zinput)) + :eof)))) + (t + (wf-error zinput "No PE here."))))) + ((and (eq mode :ATT) (rune= c #/<)) + (wf-error zinput "unexpected #\/<")) + ((and canon-space-p (space-rune-p c)) + (collect #/space)) + ((not (data-rune-p c)) + (wf-error zinput "illegal char: ~S." c)) + (t + (collect c))))))) + (declare (dynamic-extent #'muffle)) + (muffle input (or delim + (let ((delim (read-rune input))) + (unless (member delim '(#/\" #/\') :test #'eql) + (wf-error zinput "invalid attribute delimiter")) + delim)))))) + +(defun read-character-reference (input) + ;; The #/& is already read + (let ((res + (let ((c (read-rune input))) + (check-rune input c #/#) + (setq c (read-rune input)) + (cond ((eql c :eof) + (eox input)) + ((eql c #/x) + ;; hexadecimal + (setq c (read-rune input)) + (when (eql c :eof) + (eox input)) + (unless (digit-rune-p c 16) + (wf-error input "garbage in character reference")) + (prog1 + (parse-integer + (with-output-to-string (sink) + (write-char (rune-char c) sink) + (while (progn + (setq c (read-rune input)) + (when (eql c :eof) + (eox input)) + (digit-rune-p c 16)) + (write-char (rune-char c) sink))) + :radix 16) + (check-rune input c #/\;))) + ((rune<= #/0 c #/9) + ;; decimal + (prog1 + (parse-integer + (with-output-to-string (sink) + (write-char (rune-char c) sink) + (while (progn + (setq c (read-rune input)) + (when (eql c :eof) + (eox input)) + (rune<= #/0 c #/9)) + (write-char (rune-char c) sink))) + :radix 10) + (check-rune input c #/\;))) + (t + (wf-error input "Bad char in numeric character entity.")))))) + (unless (code-data-char-p res) + (wf-error + input + "expansion of numeric character reference (#x~X) is no data char." + res)) + res)) + +(defun read-pi (input) + ;; "<?" is already read + (let (name) + (let ((c (peek-rune input))) + (unless (name-start-rune-p c) + (wf-error input "Expecting name after '<?'")) + (setf name (read-name-token input))) + (cond + ((member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D) + :test #'eql) + (values name (read-pi-content input))) + (t + (unless (and (eql (read-rune input) #/?) + (eql (read-rune input) #/>)) + (wf-error input "malformed processing instruction")) + (values name ""))))) + +(defun read-pi-content (input) + (read-S? input) + (let (d) + (with-rune-collector (collect) + (block nil + (tagbody + state-1 + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/?) (go state-2)) + (collect d) + (go state-1) + state-2 ;; #/? seen + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/>) (return)) + (when (rune= d #/?) + (collect #/?) + (go state-2)) + (collect #/?) + (collect d) + (go state-1)))))) + +(defun read-comment-content (input &aux d) + (with-rune-collector (collect) + (block nil + (tagbody + state-1 + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/-) (go state-2)) + (collect d) + (go state-1) + state-2 ;; #/- seen + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/-) (go state-3)) + (collect #/-) + (collect d) + (go state-1) + state-3 ;; #/- #/- seen + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/>) (return)) + (wf-error input "'--' not allowed in a comment") + (when (rune= d #/-) + (collect #/-) + (go state-3)) + (collect #/-) + (collect #/-) + (collect d) + (go state-1))))) + +(defun read-cdata-sect (input &aux d) + ;; <![CDATA[ is already read + ;; read anything up to ]]> + (with-rune-collector (collect) + (block nil + (tagbody + state-1 + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/\]) (go state-2)) + (collect d) + (go state-1) + state-2 ;; #/] seen + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/\]) (go state-3)) + (collect #/\]) + (collect d) + (go state-1) + state-3 ;; #/\] #/\] seen + (setf d (read-rune input)) + (when (eq d :eof) + (eox input)) + (unless (data-rune-p d) + (wf-error input "Illegal char: ~S." d)) + (when (rune= d #/>) + (return)) + (when (rune= d #/\]) + (collect #/\]) + (go state-3)) + (collect #/\]) + (collect #/\]) + (collect d) + (go state-1))))) + +;; some character categories + +(defun space-rune-p (rune) + (declare (type rune rune)) + (or (rune= rune #/U+0020) + (rune= rune #/U+0009) + (rune= rune #/U+000A) + (rune= rune #/U+000D))) + +(defun code-data-char-p (c) + ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + (or (= c #x9) (= c #xA) (= c #xD) + (<= #x20 c #xD7FF) + (<= #xE000 c #xFFFD) + (<= #x10000 c #x10FFFF))) + +(defun pubid-char-p (c) + (or (rune= c #/u+0020) (rune= c #/u+000D) (rune= c #/u+000A) + (rune<= #/a c #/z) + (rune<= #/A c #/Z) + (rune<= #/0 c #/9) + (member c '(#/- #/' #/\( #/\) #/+ #/, #/. #// + #/: #/= #/? #/\; #/! #/* #/# + #/@ #/$ #/_ #/%)))) + + +(defun expect (input category) + (multiple-value-bind (cat sem) (read-token input) + (unless (eq cat category) + (wf-error input "Expected ~S saw ~S [~S]" category cat sem)) + (values cat sem))) + +(defun consume-token (input) + (read-token input)) + +;;;; --------------------------------------------------------------------------- +;;;; Parser +;;;; + +(defun p/S (input) + ;; S ::= (#x20 | #x9 | #xD | #xA)+ + (expect input :S) + (while (eq (peek-token input) :S) + (consume-token input))) + +(defun p/S? (input) + ;; S ::= (#x20 | #x9 | #xD | #xA)+ + (while (eq (peek-token input) :S) + (consume-token input))) + +(defun p/nmtoken (input) + (nth-value 1 (expect input :nmtoken))) + +(defun p/name (input) + (let ((result (p/nmtoken input))) + (unless (name-start-rune-p (elt result 0)) + (wf-error input "Expected name.")) + result)) + +(defun p/attlist-decl (input) + ;; [52] AttlistDecl ::= '<!ATTLIST' S Name (S AttDef)* S? '>' + (let (elm-name) + (expect input :|<!ATTLIST|) + (p/S input) + (setf elm-name (p/nmtoken input)) + (loop + (let ((tok (read-token input))) + (case tok + (:S + (p/S? input) + (cond ((eq (peek-token input) :>) + (consume-token input) + (return)) + (t + (multiple-value-bind (name type default) (p/attdef input) + (define-attribute (dtd *ctx*) elm-name name type default)) ))) + (:> + (return)) + (otherwise + (wf-error input + "Expected either another AttDef or end of \"<!ATTLIST\". -- saw ~S." + tok))))))) + +(defun p/attdef (input) + ;; [53] AttDef ::= Name S AttType S DefaultDecl + (let (name type default) + (setf name (p/nmtoken input)) + (p/S input) + (setf type (p/att-type input)) + (p/S input) + (setf default (p/default-decl input)) + (values name type default))) + +(defun p/list (input item-parser delimiter) + ;; Parse something like S? <item> (S? <delimiter> <item>)* S? + ;; + (declare (type function item-parser)) + (let (res) + (p/S? input) + (setf res (list (funcall item-parser input))) + (loop + (p/S? input) + (cond ((eq (peek-token input) delimiter) + (consume-token input) + (p/S? input) + (push (funcall item-parser input) res)) + (t + (return)))) + (p/S? input) + (reverse res))) + +(defun p/att-type (input) + ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType + ;; [55] StringType ::= 'CDATA' + ;; [56] TokenizedType ::= 'ID' /*VC: ID */ + ;; /*VC: One ID per Element Type */ + ;; /*VC: ID Attribute Default */ + ;; | 'IDREF' /*VC: IDREF */ + ;; | 'IDREFS' /*VC: IDREF */ + ;; | 'ENTITY' /*VC: Entity Name */ + ;; | 'ENTITIES' /*VC: Entity Name */ + ;; | 'NMTOKEN' /*VC: Name Token */ + ;; | 'NMTOKENS' /*VC: Name Token */ + ;; [57] EnumeratedType ::= NotationType | Enumeration + ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + ;; /* VC: Notation Attributes */ + ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */ + (multiple-value-bind (cat sem) (read-token input) + (cond ((eq cat :nmtoken) + (cond ((rod= sem '#.(string-rod "CDATA")) :CDATA) + ((rod= sem '#.(string-rod "ID")) :ID) + ((rod= sem '#.(string-rod "IDREF")) :IDREFS) + ((rod= sem '#.(string-rod "IDREFS")) :IDREFS) + ((rod= sem '#.(string-rod "ENTITY")) :ENTITY) + ((rod= sem '#.(string-rod "ENTITIES")) :ENTITIES) + ((rod= sem '#.(string-rod "NMTOKEN")) :NMTOKEN) + ((rod= sem '#.(string-rod "NMTOKENS")) :NMTOKENS) + ((rod= sem '#.(string-rod "NOTATION")) + (let (names) + (p/S input) + (expect input :\() + (setf names (p/list input #'p/nmtoken :\| )) + (expect input :\)) + (when *validate* + (setf (referenced-notations *ctx*) + (append names (referenced-notations *ctx*)))) + (cons :NOTATION names))) + (t + (wf-error input "In p/att-type: ~S ~S." cat sem)))) + ((eq cat :\() + ;; XXX Die Nmtoken-Syntax pruefen wir derzeit nur beim Validieren. + (let (names) + ;;(expect input :\() + (setf names (p/list input #'p/nmtoken :\| )) + (expect input :\)) + (cons :ENUMERATION names))) + (t + (wf-error input "In p/att-type: ~S ~S." cat sem)) ))) + +(defun p/default-decl (input) + ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' + ;; | (('#FIXED' S)? AttValue) /* VC: Required Attribute */ + ;; + ;; /* VC: Attribute Default Legal */ + ;; /* WFC: No < in Attribute Values */ + ;; /* VC: Fixed Attribute Default */ + (multiple-value-bind (cat sem) (peek-token input) + (cond ((eq cat :|#REQUIRED|) + (consume-token input) :REQUIRED) + ((eq cat :|#IMPLIED|) + (consume-token input) :IMPLIED) + ((eq cat :|#FIXED|) + (consume-token input) + (p/S input) + (list :FIXED (p/att-value input))) + ((or (eq cat :\') (eq cat :\")) + (list :DEFAULT (p/att-value input))) + (t + (wf-error input "p/default-decl: ~S ~S." cat sem)) ))) +;;;; + +;; [70] EntityDecl ::= GEDecl | PEDecl +;; [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' +;; [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' +;; [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) +;; [74] PEDef ::= EntityValue | ExternalID +;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral +;; | 'PUBLIC' S PubidLiteral S SystemLiteral +;; [76] NDataDecl ::= S 'NDATA' S Name /* VC: Notation Declared */ + +(defun p/entity-decl (input) + (let (name def kind) + (expect input :|<!ENTITY|) + (p/S input) + (cond ((eq (peek-token input) :%) + (setf kind :parameter) + (consume-token input) + (p/S input)) + (t + (setf kind :general))) + (setf name (p/name input)) + (p/S input) + (setf def (p/entity-def input kind)) + (define-entity input name kind def) + (p/S? input) + (expect input :\>))) + +(defun report-entity (h kind name def) + (etypecase def + (external-entdef + (let ((extid (entdef-extid def)) + (ndata (entdef-ndata def))) + (if ndata + (sax:unparsed-entity-declaration h + name + (extid-public extid) + (uri-rod (extid-system extid)) + ndata) + (sax:external-entity-declaration h + kind + name + (extid-public extid) + (uri-rod (extid-system extid)))))) + (internal-entdef + (sax:internal-entity-declaration h kind name (entdef-value def))))) + +(defun p/entity-def (input kind) + (multiple-value-bind (cat sem) (peek-token input) + (cond ((member cat '(:\" :\')) + (make-internal-entdef (p/entity-value input))) + ((and (eq cat :nmtoken) + (or (rod= sem '#.(string-rod "SYSTEM")) + (rod= sem '#.(string-rod "PUBLIC")))) + (let (extid ndata) + (setf extid (p/external-id input nil)) + (when (eq kind :general) ;NDATA allowed at all? + (cond ((eq (peek-token input) :S) + (p/S? input) + (when (and (eq (peek-token input) :nmtoken) + (rod= (nth-value 1 (peek-token input)) + '#.(string-rod "NDATA"))) + (consume-token input) + (p/S input) + (setf ndata (p/nmtoken input)) + (when *validate* + (push ndata (referenced-notations *ctx*))))))) + (make-external-entdef extid ndata))) + (t + (wf-error input "p/entity-def: ~S / ~S." cat sem)) ))) + +(defun p/entity-value (input) + (let ((delim (if (eq (read-token input) :\") #/\" #/\'))) + (read-att-value input + (car (zstream-input-stack input)) + :ENT + nil + delim))) + +(defun p/att-value (input) + (let ((delim (if (eq (read-token input) :\") #/\" #/\'))) + (read-att-value input + (car (zstream-input-stack input)) + :ATT + t + delim))) + +(defun p/external-id (input &optional (public-only-ok-p nil)) + ;; xxx public-only-ok-p + (multiple-value-bind (cat sem) (read-token input) + (cond ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "SYSTEM"))) + (p/S input) + (make-extid nil (p/system-literal input))) + ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "PUBLIC"))) + (let (pub sys) + (p/S input) + (setf pub (p/pubid-literal input)) + (when (eq (peek-token input) :S) + (p/S input) + (when (member (peek-token input) '(:\" :\')) + (setf sys (p/system-literal input)))) + (when (and (not public-only-ok-p) + (null sys)) + (wf-error input "System identifier needed for this PUBLIC external identifier.")) + (make-extid pub sys))) + (t + (wf-error input "Expected external-id: ~S / ~S." cat sem))))) + + +;; [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") +;; [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" +;; [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] +;; | [-'()+,./:=?;!*#@$_%] + +(defun p/id (input) + (multiple-value-bind (cat) (read-token input) + (cond ((member cat '(:\" :\')) + (let ((delim (if (eq cat :\") #/\" #/\'))) + (with-rune-collector (collect) + (loop + (let ((c (read-rune (car (zstream-input-stack input))))) + (cond ((eq c :eof) + (eox input "EOF in system literal.")) + ((rune= c delim) + (return)) + (t + (collect c)))))))) + (t + (wf-error input "Expect either \" or \'."))))) + +;; it is important to cache the orginal URI rod, since the re-serialized +;; uri-string can be different from the one parsed originally. +(defun uri-rod (uri) + (if uri + (or (getf (puri:uri-plist uri) 'original-rod) + (rod (puri:render-uri uri nil))) + nil)) + +(defun safe-parse-uri (str) + ;; puri doesn't like strings starting with file:///, although that is a very + ;; common is practise. Cut it away, we don't distinguish between scheme + ;; :FILE and NIL anway. + (when (eql (search "file://" str) 0) + (setf str (subseq str (length "file://")))) + (puri:parse-uri (coerce str 'simple-string))) + +(defun p/system-literal (input) + (let* ((rod (p/id input)) + (result (safe-parse-uri (rod-string rod)))) + (setf (getf (puri:uri-plist result) 'original-rod) rod) + result)) + +(defun p/pubid-literal (input) + (let ((result (p/id input))) + (unless (every #'pubid-char-p result) + (wf-error input "Illegal pubid: ~S." (rod-string result))) + result)) + + +;;;; + +(defun p/element-decl (input) + (let (name content) + (expect input :|<!ELEMENT|) + (p/S input) + (setf name (p/nmtoken input)) + (p/S input) + (setf content (normalize-mixed-cspec (p/cspec input))) + (unless (legal-content-model-p content *validate*) + (wf-error input "Malformed or invalid content model: ~S." (mu content))) + (p/S? input) + (expect input :\>) + (when *validate* + (define-element (dtd *ctx*) name content)) + (list :element name content))) + +(defun maybe-compile-cspec (e) + (or (elmdef-compiled-cspec e) + (setf (elmdef-compiled-cspec e) + (let ((cspec (elmdef-content e))) + (unless cspec + (validity-error "(03) Element Valid: no definition for ~A" + (rod-string (elmdef-name e)))) + (multiple-value-call #'cons + (compile-cspec cspec (standalone-check-necessary-p e))))))) + +(defun make-root-model (name) + (cons (lambda (actual-name) + (if (rod= actual-name name) + (constantly :dummy) + nil)) + (constantly t))) + +;;; content spec validation: +;;; +;;; Given a `contentspec', COMPILE-CSPEC returns as multiple values two +;;; functions A and B of one argument to be called for every +;;; A. child element +;;; B. text child node +;;; +;;; Function A will be called with +;;; - the element name rod as its argument. If that element may appear +;;; at the current position, a new function to be called for the next +;;; child is returned. Otherwise NIL is returned. +;;; - argument NIL at the end of the element, it must then return T or NIL +;;; to indicate whether the end tag is valid. +;;; +;;; Function B will be called with the character data rod as its argument, it +;;; returns a boolean indicating whether this text node is allowed. +;;; +;;; That is, if one of the functions ever returns NIL, the node is +;;; rejected as invalid. + +(defun cmodel-done (actual-value) + (null actual-value)) + +(defun compile-cspec (cspec &optional standalone-check) + (cond + ((atom cspec) + (ecase cspec + (:EMPTY (values #'cmodel-done (constantly nil))) + (:PCDATA (values #'cmodel-done (constantly t))) + (:ANY + (values (labels ((doit (name) (if name #'doit t))) #'doit) + (constantly t))))) + ((and (eq (car cspec) '*) + (let ((subspec (second cspec))) + (and (eq (car subspec) 'or) (eq (cadr subspec) :PCDATA)))) + (values (compile-mixed (second cspec)) + (constantly t))) + (t + (values (compile-content-model cspec) + (lambda (rod) + (when standalone-check + (validity-error "(02) Standalone Document Declaration: whitespace")) + (every #'white-space-rune-p rod)))))) + +(defun compile-mixed (cspec) + ;; das koennten wir theoretisch auch COMPILE-CONTENT-MODEL erledigen lassen + (let ((allowed-names (cddr cspec))) + (labels ((doit (actual-name) + (cond + ((null actual-name) t) + ((member actual-name allowed-names :test #'rod=) #'doit) + (t nil)))) + #'doit))) + +(defun compile-content-model (cspec &optional (continuation #'cmodel-done)) + (if (vectorp cspec) + (lambda (actual-name) + (if (and actual-name (rod= cspec actual-name)) + continuation + nil)) + (ecase (car cspec) + (and + (labels ((traverse (seq) + (compile-content-model (car seq) + (if (cdr seq) + (traverse (cdr seq)) + continuation)))) + (traverse (cdr cspec)))) + (or + (let ((options (mapcar (rcurry #'compile-content-model continuation) + (cdr cspec)))) + (lambda (actual-name) + (some (rcurry #'funcall actual-name) options)))) + (? + (let ((maybe (compile-content-model (second cspec) continuation))) + (lambda (actual-name) + (or (funcall maybe actual-name) + (funcall continuation actual-name))))) + (* + (let (maybe-continuation) + (labels ((recurse (actual-name) + (if (null actual-name) + (funcall continuation actual-name) + (or (funcall maybe-continuation actual-name) + (funcall continuation actual-name))))) + (setf maybe-continuation + (compile-content-model (second cspec) #'recurse)) + #'recurse))) + (+ + (let ((it (cadr cspec))) + (compile-content-model `(and ,it (* ,it)) continuation)))))) + +(defun setp (list &key (test 'eql)) + (equal list (remove-duplicates list :test test))) + +(defun legal-content-model-p (cspec &optional validate) + (or (eq cspec :PCDATA) + (eq cspec :ANY) + (eq cspec :EMPTY) + (and (consp cspec) + (eq (car cspec) '*) + (consp (cadr cspec)) + (eq (car (cadr cspec)) 'or) + (eq (cadr (cadr cspec)) :PCDATA) + (every #'vectorp (cddr (cadr cspec))) + (if (and validate (not (setp (cddr (cadr cspec)) :test #'rod=))) + (validity-error "VC: No Duplicate Types (07)") + t)) + (labels ((walk (x) + (cond ((member x '(:PCDATA :ANY :EMPTY)) + nil) + ((atom x) t) + ((and (walk (car x)) + (walk (cdr x))))))) + (walk cspec)))) + +;; wir fahren besser, wenn wir machen: + +;; cspec ::= 'EMPTY' | 'ANY' | '#PCDATA' +;; | Name +;; | cs +;; cs ::= '(' S? cspec ( S? '|' S? cspec)* S? ')' ('?' | '*' | '+')? +;; und eine post factum analyse + +(defun p/cspec (input &optional recursivep) + (let ((term + (let ((names nil) op-cat op res stream) + (multiple-value-bind (cat sem) (peek-token input) + (cond ((eq cat :nmtoken) + (consume-token input) + (cond ((rod= sem '#.(string-rod "EMPTY")) + :EMPTY) + ((rod= sem '#.(string-rod "ANY")) + :ANY) + ((not recursivep) + (wf-error input "invalid content spec")) + (t + sem))) + ((eq cat :\#PCDATA) + (consume-token input) + :PCDATA) + ((eq cat :\() + (setf stream (car (zstream-input-stack input))) + (consume-token input) + (p/S? input) + (setq names (list (p/cspec input t))) + (p/S? input) + (cond ((member (peek-token input) '(:\| :\,)) + (setf op-cat (peek-token input)) + (setf op (if (eq op-cat :\,) 'and 'or)) + (while (eq (peek-token input) op-cat) + (consume-token input) + (p/S? input) + (push (p/cspec input t) names) + (p/S? input)) + (setf res (cons op (reverse names)))) + (t + (setf res (cons 'and names)))) + (p/S? input) + (expect input :\)) + (when *validate* + (unless (eq stream (car (zstream-input-stack input))) + (validity-error "(06) Proper Group/PE Nesting"))) + res) + (t + (wf-error input "p/cspec - ~s / ~s" cat sem))))))) + (cond ((eq (peek-token input) :?) (consume-token input) (list '? term)) + ((eq (peek-token input) :+) (consume-token input) (list '+ term)) + ((eq (peek-token input) :*) (consume-token input) (list '* term)) + (t + term)))) + +(defun normalize-mixed-cspec (cspec) + ;; der Parser oben funktioniert huebsch fuer die children-Regel, aber + ;; fuer Mixed ist das Ergebnis nicht praktisch, denn dort wollen wir + ;; eigentlich auf eine Liste von Namen in einheitlichem Format hinaus. + ;; Dazu normalisieren wir einfach in eine der beiden folgenden Formen: + ;; (* (or :PCDATA ...rods...)) -- und zwar exakt so! + ;; :PCDATA -- sonst ganz trivial + (flet ((trivialp (c) + (and (consp c) + (and (eq (car c) 'and) + (eq (cadr c) :PCDATA) + (null (cddr c)))))) + (if (or (trivialp cspec) ;(and PCDATA) + (and (consp cspec) ;(* (and PCDATA)) + (and (eq (car cspec) '*) + (null (cddr cspec)) + (trivialp (cadr cspec))))) + :PCDATA + cspec))) + +;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' + + +;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDefs S? '>' +;; [52] AttlistDecl ::= '<!ATTLIST' S Name S? '>' +;; [53] AttDefs ::= S Name S AttType S DefaultDecl AttDefs +;; [53] AttDefs ::= + +(defun p/notation-decl (input) + (let (name id) + (expect input :|<!NOTATION|) + (p/S input) + (setf name (p/name input)) + (p/S input) + (setf id (p/external-id input t)) + (p/S? input) + (expect input :\>) + (sax:notation-declaration (handler *ctx*) + name + (if (extid-public id) + (normalize-public-id (extid-public id)) + nil) + (uri-rod (extid-system id))) + (when (and sax:*namespace-processing* (find #/: name)) + (wf-error input "colon in notation name")) + (when *validate* + (define-notation (dtd *ctx*) name id)) + (list :notation-decl name id))) + +(defun normalize-public-id (rod) + (with-rune-collector (collect) + (let ((gimme-20 nil) + (anything-seen-p nil)) + (map nil (lambda (c) + (cond + ((or (rune= c #/u+0009) + (rune= c #/u+000A) + (rune= c #/u+000D) + (rune= c #/u+0020)) + (setf gimme-20 t)) + (t + (when (and anything-seen-p gimme-20) + (collect #/u+0020)) + (setf gimme-20 nil) + (setf anything-seen-p t) + (collect c)))) + rod)))) + +;;; + +(defun p/conditional-sect (input) + (expect input :<!\[ ) + (let ((stream (car (zstream-input-stack input)))) + (p/S? input) + (multiple-value-bind (cat sem) (read-token input) + (cond ((and (eq cat :nmtoken) + (rod= sem '#.(string-rod "INCLUDE"))) + (p/include-sect input stream)) + ((and (eq cat :nmtoken) + (rod= sem '#.(string-rod "IGNORE"))) + (p/ignore-sect input stream)) + (t + (wf-error input "Expected INCLUDE or IGNORE after \"<![\".")))))) + +(defun p/cond-expect (input cat initial-stream) + (expect input cat) + (when *validate* + (unless (eq (car (zstream-input-stack input)) initial-stream) + (validity-error "(21) Proper Conditional Section/PE Nesting")))) + +(defun p/include-sect (input initial-stream) + ;; <![INCLUDE is already read. + (p/S? input) + (p/cond-expect input :\[ initial-stream) + (p/ext-subset-decl input) + (p/cond-expect input :\] initial-stream) + (p/cond-expect input :\] initial-stream) + (p/cond-expect input :\> initial-stream)) + +(defun p/ignore-sect (input initial-stream) + ;; <![IGNORE is already read. + ;; XXX Is VC 21 being checked for nested sections? + (p/S? input) + (p/cond-expect input :\[ initial-stream) + (let ((input (car (zstream-input-stack input)))) + (let ((level 0)) + (do ((c1 (read-rune input) (read-rune input)) + (c2 #/U+0000 c1) + (c3 #/U+0000 c2)) + ((= level -1)) + (declare (type fixnum level)) + (cond ((eq c1 :eof) + (eox input "EOF in <![IGNORE ... >"))) + (cond ((and (rune= c3 #/<) (rune= c2 #/!) (rune= c1 #/\[)) + (incf level))) + (cond ((and (rune= c3 #/\]) (rune= c2 #/\]) (rune= c1 #/>)) + (decf level))) ))) + (unless (eq (car (zstream-input-stack input)) initial-stream) + (validity-error "(21) Proper Conditional Section/PE Nesting"))) + +(defun p/ext-subset-decl (input) + ;; ( markupdecl | conditionalSect | S )* + (loop + (case (let ((*expand-pe-p* nil)) (peek-token input)) + (:|<![| (let ((*expand-pe-p* t)) (p/conditional-sect input))) + (:S (consume-token input)) + (:eof (return)) + ((:|<!ELEMENT| :|<!ATTLIST| :|<!ENTITY| :|<!NOTATION| :PI :COMMENT) + (let ((*expand-pe-p* t) + (*external-subset-p* t)) + (p/markup-decl input))) + ((:PE-REFERENCE) + (let ((name (nth-value 1 (read-token input)))) + (recurse-on-entity input name :parameter + (lambda (input) + (etypecase (checked-get-entdef name :parameter) + (external-entdef + (p/ext-subset input)) + (internal-entdef + (p/ext-subset-decl input))) + (unless (eq :eof (peek-token input)) + (wf-error input "Trailing garbage.")))))) + (otherwise (return)))) ) + +(defun p/markup-decl (input) + (peek-token input) + (let ((stream (car (zstream-input-stack input)))) + (multiple-value-prog1 + (p/markup-decl-unsafe input) + (when *validate* + (unless (eq stream (car (zstream-input-stack input))) + (validity-error "(01) Proper Declaration/PE Nesting")))))) + +(defun p/markup-decl-unsafe (input) + ;; markupdecl ::= elementdecl | AttlistDecl /* VC: Proper Declaration/PE Nesting */ + ;; | EntityDecl | NotationDecl + ;; | PI | Comment /* WFC: PEs in Internal Subset */ + (let ((token (peek-token input)) + (*expand-pe-p* (and *expand-pe-p* *external-subset-p*))) + (case token + (:|<!ELEMENT| (p/element-decl input)) + (:|<!ATTLIST| (p/attlist-decl input)) + (:|<!ENTITY| (p/entity-decl input)) + (:|<!NOTATION| (p/notation-decl input)) + (:PI + (let ((sem (nth-value 1 (read-token input)))) + (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem)))) + (:COMMENT (consume-token input)) + (otherwise + (wf-error input "p/markup-decl ~S" (peek-token input)))))) + +(defun setup-encoding (input xml-header) + (when (xml-header-encoding xml-header) + (let ((enc (find-encoding (xml-header-encoding xml-header)))) + (cond (enc + (setf (xstream-encoding (car (zstream-input-stack input))) + enc)) + (t + (warn "There is no such encoding: ~S." (xml-header-encoding xml-header))))))) + +(defun set-full-speed (input) + (let ((xstream (car (zstream-input-stack input)))) + (when xstream + (set-to-full-speed xstream)))) + +(defun p/ext-subset (input) + (cond ((eq (peek-token input) :xml-decl) + (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input)))))) + (setup-encoding input hd)) + (consume-token input))) + (set-full-speed input) + (p/ext-subset-decl input) + (unless (eq (peek-token input) :eof) + (wf-error input "Trailing garbage - ~S." (peek-token input)))) + +(defvar *catalog* nil) + +(defun extid-using-catalog (extid) + (if *catalog* + (let ((sysid + (resolve-extid (extid-public extid) + (extid-system extid) + *catalog*))) + (if sysid + (make-extid nil sysid) + extid)) + extid)) + +(defun p/doctype-decl (input &optional dtd-extid) + (let () + (let ((*expand-pe-p* nil) + name extid) + (expect input :|<!DOCTYPE|) + (p/S input) + (setq name (p/nmtoken input)) + (when *validate* + (setf (model-stack *ctx*) (list (make-root-model name)))) + (when (eq (peek-token input) :S) + (p/S input) + (unless (or (eq (peek-token input) :\[ ) + (eq (peek-token input) :\> )) + (setf extid (p/external-id input t)))) + (when dtd-extid + (setf extid dtd-extid)) + (p/S? input) + (sax:start-dtd (handler *ctx*) + name + (and extid (extid-public extid)) + (and extid (uri-rod (extid-system extid)))) + (when (eq (peek-token input) :\[ ) + (when (disallow-internal-subset *ctx*) + (wf-error input "document includes an internal subset")) + (ensure-dtd) + (consume-token input) + (sax:start-internal-subset (handler *ctx*)) + (while (progn (p/S? input) + (not (eq (peek-token input) :\] ))) + (if (eq (peek-token input) :PE-REFERENCE) + (let ((name (nth-value 1 (read-token input)))) + (recurse-on-entity input name :parameter + (lambda (input) + (etypecase (checked-get-entdef name :parameter) + (external-entdef + (p/ext-subset input)) + (internal-entdef + (p/ext-subset-decl input))) + (unless (eq :eof (peek-token input)) + (wf-error input "Trailing garbage."))))) + (let ((*expand-pe-p* t)) + (p/markup-decl input)))) + (consume-token input) + (sax:end-internal-subset (handler *ctx*)) + (p/S? input)) + (expect input :>) + (when extid + (let* ((effective-extid + (extid-using-catalog (absolute-extid input extid))) + (sysid (extid-system effective-extid)) + (fresh-dtd-p (null (dtd *ctx*))) + (cached-dtd + (and fresh-dtd-p + (not (standalone-p *ctx*)) + (getdtd sysid *dtd-cache*)))) + (cond + (cached-dtd + (setf (dtd *ctx*) cached-dtd) + (report-cached-dtd cached-dtd)) + (t + (let* ((xi2 (xstream-open-extid effective-extid)) + (zi2 (make-zstream :input-stack (list xi2)))) + (ensure-dtd) + (p/ext-subset zi2) + (when (and fresh-dtd-p + *cache-all-dtds* + *validate* + (not (standalone-p *ctx*))) + (setf (getdtd sysid *dtd-cache*) (dtd *ctx*)))))))) + (sax:end-dtd (handler *ctx*)) + (let ((dtd (dtd *ctx*))) + (sax:entity-resolver + (handler *ctx*) + (lambda (name handler) (resolve-entity name handler dtd))) + (sax::dtd (handler *ctx*) dtd)) + (list :DOCTYPE name extid)))) + +(defun report-cached-dtd (dtd) + (maphash (lambda (k v) + (report-entity (handler *ctx*) :general k (cdr v))) + (dtd-gentities dtd)) + (maphash (lambda (k v) + (report-entity (handler *ctx*) :parameter k (cdr v))) + (dtd-pentities dtd)) + (maphash (lambda (k v) + (sax:notation-declaration + (handler *ctx*) + k + (if (extid-public v) + (normalize-public-id (extid-public v)) + nil) + (uri-rod (extid-system v)))) + (dtd-notations dtd))) + +(defun p/misc*-2 (input) + ;; Misc* + (while (member (peek-token input) '(:COMMENT :PI :S)) + (case (peek-token input) + (:COMMENT + (sax:comment (handler *ctx*) (nth-value 1 (peek-token input)))) + (:PI + (sax:processing-instruction + (handler *ctx*) + (car (nth-value 1 (peek-token input))) + (cdr (nth-value 1 (peek-token input)))))) + (consume-token input))) + +(defun p/document + (input handler + &key validate dtd root entity-resolver disallow-internal-subset + (recode t)) + ;; check types of user-supplied arguments for better error messages: + (check-type validate boolean) + (check-type recode boolean) + (check-type dtd (or null extid)) + (check-type root (or null rod)) + (check-type entity-resolver (or null function symbol)) + (check-type disallow-internal-subset boolean) + #+rune-is-integer + (when recode + (setf handler (make-recoder handler #'rod-to-utf8-string))) + (let ((*ctx* + (make-context :handler handler + :main-zstream input + :entity-resolver entity-resolver + :disallow-internal-subset disallow-internal-subset)) + (*validate* validate)) + (sax:start-document handler) + ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc* + ;; Misc ::= Comment | PI | S + ;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' + ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) + ;; + ;; we will use the attribute-value parser for the xml decl. + (let ((*data-behaviour* :DTD)) + ;; optional XMLDecl? + (cond ((eq (peek-token input) :xml-decl) + (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input)))))) + (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes)) + (setup-encoding input hd)) + (read-token input))) + (set-full-speed input) + ;; Misc* + (p/misc*-2 input) + ;; (doctypedecl Misc*)? + (cond + ((eq (peek-token input) :<!DOCTYPE) + (p/doctype-decl input dtd) + (p/misc*-2 input)) + (dtd + (let ((dummy (string->xstream "<!DOCTYPE dummy>"))) + (setf (xstream-name dummy) + (make-stream-name + :entity-name "dummy doctype" + :entity-kind :main + :uri (zstream-base-sysid input))) + (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd))) + ((and validate (not dtd)) + (validity-error "invalid document: no doctype"))) + (ensure-dtd) + ;; Override expected root element if asked to + (when root + (setf (model-stack *ctx*) (list (make-root-model root)))) + ;; element + (let ((*data-behaviour* :DOC)) + (p/element input)) + ;; optional Misc* + (p/misc*-2 input) + (unless (eq (peek-token input) :eof) + (wf-error input "Garbage at end of document.")) + (when *validate* + (maphash (lambda (k v) + (unless v + (validity-error "(11) IDREF: ~S not defined" (rod-string k)))) + (id-table *ctx*)) + + (dolist (name (referenced-notations *ctx*)) + (unless (find-notation name (dtd *ctx*)) + (validity-error "(23) Notation Declared: ~S" (rod-string name))))) + (sax:end-document handler)))) + +(defun p/element (input) + (multiple-value-bind (cat sem) (read-token input) + (case cat + ((:stag :ztag)) + (:eof (eox input)) + (t (wf-error input "element expected"))) + (destructuring-bind (&optional name &rest raw-attrs) sem + (validate-start-element *ctx* name) + (let* ((attrs + (process-attributes *ctx* name (build-attribute-list raw-attrs))) + (*namespace-bindings* *namespace-bindings*) + new-namespaces) + (when sax:*namespace-processing* + (setf new-namespaces (declare-namespaces attrs)) + (mapc #'set-attribute-namespace attrs)) + (multiple-value-bind (uri prefix local-name) + (if sax:*namespace-processing* + (decode-qname name) + (values nil nil nil)) + (declare (ignore prefix)) + (check-attribute-uniqueness attrs) + (unless (or sax:*include-xmlns-attributes* + (null sax:*namespace-processing*)) + (setf attrs + (remove-if (compose #'xmlns-attr-p #'sax:attribute-qname) + attrs))) + (cond + ((eq cat :ztag) + (sax:start-element (handler *ctx*) uri local-name name attrs) + (sax:end-element (handler *ctx*) uri local-name name)) + + ((eq cat :stag) + (sax:start-element (handler *ctx*) uri local-name name attrs) + (p/content input) + (multiple-value-bind (cat2 sem2) (read-token input) + (unless (and (eq cat2 :etag) + (eq (car sem2) name)) + (wf-error input "Bad nesting. ~S / ~S" + (mu name) + (mu (cons cat2 sem2)))) + (when (cdr sem2) + (wf-error input "no attributes allowed in end tag"))) + (sax:end-element (handler *ctx*) uri local-name name)) + + (t + (wf-error input "Expecting element, got ~S." cat)))) + (undeclare-namespaces new-namespaces)) + (validate-end-element *ctx* name)))) + +(defun p/content (input) + ;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* + (multiple-value-bind (cat sem) (peek-token input) + (case cat + ((:stag :ztag) + (p/element input) + (p/content input)) + ((:CDATA) + (consume-token input) + (when (search #"]]>" sem) + (wf-error input "']]>' not allowed in CharData")) + (validate-characters *ctx* sem) + (sax:characters (handler *ctx*) sem) + (p/content input)) + ((:ENTITY-REF) + (let ((name sem)) + (consume-token input) + (append + (recurse-on-entity input name :general + (lambda (input) + (prog1 + (etypecase (checked-get-entdef name :general) + (internal-entdef (p/content input)) + (external-entdef (p/ext-parsed-ent input))) + (unless (eq (peek-token input) :eof) + (wf-error input "Trailing garbage. - ~S" + (peek-token input)))))) + (p/content input)))) + ((:<![) + (consume-token input) + (cons + (let ((input (car (zstream-input-stack input)))) + (unless (and (rune= #/C (read-rune input)) + (rune= #/D (read-rune input)) + (rune= #/A (read-rune input)) + (rune= #/T (read-rune input)) + (rune= #/A (read-rune input)) + (rune= #/[ (read-rune input))) + (wf-error input "After '<![', 'CDATA[' is expected.")) + (validate-characters *ctx* #"hack") ;anything other than whitespace + (sax:start-cdata (handler *ctx*)) + (sax:characters (handler *ctx*) (read-cdata-sect input)) + (sax:end-cdata (handler *ctx*))) + (p/content input))) + ((:PI) + (consume-token input) + (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem)) + (p/content input)) + ((:COMMENT) + (consume-token input) + (sax:comment (handler *ctx*) sem) + (p/content input)) + (otherwise + nil)))) + +;; [78] extParsedEnt ::= TextDecl? contentw +;; [79] extPE ::= TextDecl? extSubsetDecl + +(defstruct xml-header + version + encoding + (standalone-p nil)) + +(defun p/ext-parsed-ent (input) + ;; [78] extParsedEnt ::= '<?xml' VersionInfo? EncodingDecl S? '?>' content + (when (eq (peek-token input) :xml-decl) + (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input)))))) + (setup-encoding input hd)) + (consume-token input)) + (set-full-speed input) + (p/content input)) + +(defun parse-xml-decl (content) + (let* ((res (make-xml-header)) + (i (make-rod-xstream content)) + (z (make-zstream :input-stack (list i))) + (atts (read-attribute-list z i t))) + (unless (eq (peek-rune i) :eof) + (wf-error i "Garbage at end of XMLDecl.")) + ;; versioninfo muss da sein + ;; dann ? encodingdecl + ;; dann ? sddecl + ;; dann ende + (unless (eq (caar atts) (intern-name '#.(string-rod "version"))) + (wf-error i "XMLDecl needs version.")) + (unless (and (>= (length (cdar atts)) 1) + (every (lambda (x) + (or (rune<= #/a x #/z) + (rune<= #/A x #/Z) + (rune<= #/0 x #/9) + (rune= x #/_) + (rune= x #/.) + (rune= x #/:) + (rune= x #/-))) + (cdar atts))) + (wf-error i"Bad XML version number: ~S." (rod-string (cdar atts)))) + (setf (xml-header-version res) (rod-string (cdar atts))) + (pop atts) + (when (eq (caar atts) (intern-name '#.(string-rod "encoding"))) + (unless (and (>= (length (cdar atts)) 1) + (every (lambda (x) + (or (rune<= #/a x #/z) + (rune<= #/A x #/Z) + (rune<= #/0 x #/9) + (rune= x #/_) + (rune= x #/.) + (rune= x #/-))) + (cdar atts)) + ((lambda (x) + (or (rune<= #/a x #/z) + (rune<= #/A x #/Z))) + (aref (cdar atts) 0))) + (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts)))) + (setf (xml-header-encoding res) (rod-string (cdar atts))) + (pop atts)) + (when (eq (caar atts) (intern-name '#.(string-rod "standalone"))) + (unless (or (rod= (cdar atts) '#.(string-rod "yes")) + (rod= (cdar atts) '#.(string-rod "no"))) + (wf-error i "XMLDecl's 'standalone' attribute must be exactly "yes" or "no" and not ~S." + (rod-string (cdar atts)))) + (setf (xml-header-standalone-p res) + (if (rod-equal '#.(string-rod "yes") (cdar atts)) + :yes + :no)) + (pop atts)) + (when atts + (wf-error i "Garbage in XMLDecl: ~A" (rod-string content))) + res)) + +(defun parse-text-decl (content) + (let* ((res (make-xml-header)) + (i (make-rod-xstream content)) + (z (make-zstream :input-stack (list i))) + (atts (read-attribute-list z i t))) + (unless (eq (peek-rune i) :eof) + (wf-error i "Garbage at end of TextDecl")) + ;; versioninfo optional + ;; encodingdecl muss da sein + ;; dann ende + (when (eq (caar atts) (intern-name '#.(string-rod "version"))) + (unless (and (>= (length (cdar atts)) 1) + (every (lambda (x) + (or (rune<= #/a x #/z) + (rune<= #/A x #/Z) + (rune<= #/0 x #/9) + (rune= x #/_) + (rune= x #/.) + (rune= x #/:) + (rune= x #/-))) + (cdar atts))) + (wf-error i "Bad XML version number: ~S." (rod-string (cdar atts)))) + (setf (xml-header-version res) (rod-string (cdar atts))) + (pop atts)) + (unless (eq (caar atts) (intern-name '#.(string-rod "encoding"))) + (wf-error i "TextDecl needs encoding.")) + (unless (and (>= (length (cdar atts)) 1) + (every (lambda (x) + (or (rune<= #/a x #/z) + (rune<= #/A x #/Z) + (rune<= #/0 x #/9) + (rune= x #/_) + (rune= x #/.) + (rune= x #/-))) + (cdar atts)) + ((lambda (x) + (or (rune<= #/a x #/z) + (rune<= #/A x #/Z) + (rune<= #/0 x #/9))) + (aref (cdar atts) 0))) + (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts)))) + (setf (xml-header-encoding res) (rod-string (cdar atts))) + (pop atts) + (when atts + (wf-error i "Garbage in TextDecl: ~A" (rod-string content))) + res)) + +;;;; --------------------------------------------------------------------------- +;;;; mu +;;;; + +(defun mu (x) + (cond ((stringp x) x) + ((vectorp x) (rod-string x)) + ((consp x) + (cons (mu (car x)) (mu (cdr x)))) + (x))) + +;;;; --------------------------------------------------------------------------- +;;;; User interface ;;;; + +(defun specific-or (component &optional (alternative nil)) + (if (eq component :unspecific) + alternative + component)) + +(defun string-or (str &optional (alternative nil)) + (if (zerop (length str)) + alternative + str)) + +(defun make-uri (&rest initargs &key path query &allow-other-keys) + (apply #'make-instance + 'puri:uri + :path (and path (escape-path path)) + :query (and query (escape-query query)) + initargs)) + +(defun escape-path (list) + (puri::render-parsed-path list t)) + +(defun escape-query (pairs) + (flet ((escape (str) + (puri::encode-escaped-encoding str puri::*reserved-characters* t))) + (let ((first t)) + (with-output-to-string (s) + (dolist (pair pairs) + (if first + (setf first nil) + (write-char #& s)) + (write-string (escape (car pair)) s) + (write-char #= s) + (write-string (escape (cdr pair)) s)))))) + +(defun uri-parsed-query (uri) + (flet ((unescape (str) + (puri::decode-escaped-encoding str t puri::*reserved-characters*))) + (let ((str (puri:uri-query uri))) + (cond + (str + (let ((pairs '())) + (dolist (s (split-sequence-if (lambda (x) (eql x #&)) str)) + (destructuring-bind (name value) + (split-sequence-if (lambda (x) (eql x #=)) s) + (push (cons (unescape name) (unescape value)) pairs))) + (reverse pairs))) + (t + nil))))) + +(defun query-value (name alist) + (cdr (assoc name alist :test #'equal))) + +(defun pathname-to-uri (pathname) + (let ((path + (append (pathname-directory pathname) + (list + (if (specific-or (pathname-type pathname)) + (concatenate 'string + (pathname-name pathname) + "." + (pathname-type pathname)) + (pathname-name pathname)))))) + (if (eq (car path) :relative) + (make-uri :path path) + (make-uri :scheme :file + :host (concatenate 'string + (string-or (host-namestring pathname)) + "+" + (specific-or (pathname-device pathname))) + :path path)))) + +(defun parse-name.type (str) + (if str + (let ((i (position #. str :from-end t))) + (if i + (values (subseq str 0 i) (subseq str (1+ i))) + (values str nil))) + (values nil nil))) + +(defun uri-to-pathname (uri) + (let ((scheme (puri:uri-scheme uri)) + (path (puri:uri-parsed-path uri))) + (unless (member scheme '(nil :file)) + (error 'xml-parse-error + :format-control "URI scheme ~S not supported" + :format-arguments (list scheme))) + (if (eq (car path) :relative) + (multiple-value-bind (name type) + (parse-name.type (car (last path))) + (make-pathname :directory (butlast path) + :name name + :type type)) + (multiple-value-bind (name type) + (parse-name.type (car (last (cdr path)))) + (destructuring-bind (host device) + (split-sequence-if (lambda (x) (eql x #+)) + (or (puri:uri-host uri) "+")) + (make-pathname :host (string-or host) + :device (string-or device) + :directory (cons :absolute (butlast (cdr path))) + :name name + :type type)))))) + +(defun parse-xstream (xstream handler &rest args) + (let ((*ctx* nil)) + (handler-case + (let ((zstream (make-zstream :input-stack (list xstream)))) + (peek-rune xstream) + (with-scratch-pads () + (apply #'p/document zstream handler args))) + (runes-encoding:encoding-error (c) + (wf-error xstream "~A" c))))) + +(defun parse-file (filename handler &rest args) + (with-open-xfile (input filename) + (setf (xstream-name input) + (make-stream-name + :entity-name "main document" + :entity-kind :main + :uri (pathname-to-uri filename))) + (apply #'parse-xstream input handler args))) + +(defun resolve-synonym-stream (stream) + (while (typep stream 'synonym-stream) + (setf stream (symbol-value (synonym-stream-symbol stream)))) + stream) + +(defun safe-stream-sysid (stream) + (if (and (typep (resolve-synonym-stream stream) 'file-stream) + ;; ignore-errors, because sb-bsd-sockets creates instances of + ;; FILE-STREAMs that aren't + (ignore-errors (pathname stream))) + (pathname-to-uri (pathname stream)) + nil)) + +(defun parse-stream (stream handler &rest args) + (let ((xstream + (make-xstream + stream + :name (make-stream-name + :entity-name "main document" + :entity-kind :main + :uri (safe-stream-sysid stream)) + :initial-speed 1))) + (apply #'parse-xstream xstream handler args))) + +(defun parse-dtd-file (filename &optional handler) + (with-open-file (s filename :element-type '(unsigned-byte 8)) + (parse-dtd-stream s handler))) + +(defun parse-dtd-stream (stream &optional handler) + (let ((input (make-xstream stream))) + (setf (xstream-name input) + (make-stream-name + :entity-name "dtd" + :entity-kind :main + :uri (safe-stream-sysid stream))) + (let ((zstream (make-zstream :input-stack (list input))) + (*ctx* (make-context :handler handler)) + (*validate* t) + (*data-behaviour* :DTD)) + (with-scratch-pads () + (ensure-dtd) + (peek-rune input) + (p/ext-subset zstream) + (dtd *ctx*))))) + +(defun parse-rod (string handler &rest args) + (apply #'parse-xstream (string->xstream string) handler args)) + +(defun string->xstream (string) + (make-rod-xstream (string-rod string))) + +(defclass octet-input-stream + (trivial-gray-stream-mixin fundamental-binary-input-stream) + ((octets :initarg :octets) + (pos :initform 0))) + +(defmethod close ((stream octet-input-stream) &key abort) + (declare (ignore abort)) + (open-stream-p stream)) + +(defmethod stream-read-byte ((stream octet-input-stream)) + (with-slots (octets pos) stream + (if (>= pos (length octets)) + :eof + (prog1 + (elt octets pos) + (incf pos))))) + +(defmethod stream-read-sequence + ((stream octet-input-stream) sequence start end &key &allow-other-keys) + (with-slots (octets pos) stream + (let* ((length (min (- end start) (- (length octets) pos))) + (end1 (+ start length)) + (end2 (+ pos length))) + (replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2) + (setf pos end2) + end1))) + +(defun make-octet-input-stream (octets) + (make-instance 'octet-input-stream :octets octets)) + +(defun parse-octets (octets handler &rest args) + (apply #'parse-stream (make-octet-input-stream octets) handler args)) + +;;;; + +(defun zstream-push (new-xstream zstream) + (cond ((find-if (lambda (x) + (and (xstream-p x) + (eql (stream-name-entity-name (xstream-name x)) + (stream-name-entity-name (xstream-name new-xstream))) + (eql (stream-name-entity-kind (xstream-name x)) + (stream-name-entity-kind (xstream-name new-xstream))))) + (zstream-input-stack zstream)) + (wf-error zstream "Infinite recursion."))) + (push new-xstream (zstream-input-stack zstream)) + zstream) + +(defun recurse-on-entity (zstream name kind continuation &optional internalp) + (assert (not (zstream-token-category zstream))) + (call-with-entity-expansion-as-stream + zstream + (lambda (new-xstream) + (push :stop (zstream-input-stack zstream)) + (zstream-push new-xstream zstream) + (prog1 + (funcall continuation zstream) + (assert (eq (peek-token zstream) :eof)) + (assert (eq (pop (zstream-input-stack zstream)) new-xstream)) + (close-xstream new-xstream) + (assert (eq (pop (zstream-input-stack zstream)) :stop)) + (setf (zstream-token-category zstream) nil) + '(consume-token zstream)) ) + name + kind + internalp)) + +#|| +(defmacro read-data-until* ((predicate input res res-start res-end) &body body) + ;; fast variant -- for now disabled for no apparent reason + ;; -> res, res-start, res-end + `(let* ((rptr (xstream-read-ptr ,input)) + (p0 rptr) + (fptr (xstream-fill-ptr ,input)) + (buf (xstream-buffer ,input)) + ,res ,res-start ,res-end) + (declare (type fixnum rptr fptr p0) + (type (simple-array read-element (*)) buf)) + (loop + (cond ((%= rptr fptr) + ;; underflow -- hmm inject the scratch-pad with what we + ;; read and continue, while using read-rune and collecting + ;; d.h. besser wäre hier auch while-reading zu benutzen. + (setf (xstream-read-ptr ,input) rptr) + (multiple-value-setq (,res ,res-start ,res-end) + (with-rune-collector/raw (collect) + (do ((i p0 (%+ i 1))) + ((%= i rptr)) + (collect (%rune buf i))) + (let (c) + (loop + (cond ((%= rptr fptr) + (setf (xstream-read-ptr ,input) rptr) + (setf c (peek-rune input)) + (cond ((eq c :eof) + (return))) + (setf rptr (xstream-read-ptr ,input) + fptr (xstream-fill-ptr ,input) + buf (xstream-buffer ,input))) + (t + (setf c (%rune buf rptr)))) + (cond ((,predicate c) + ;; we stop + (setf (xstream-read-ptr ,input) rptr) + (return)) + (t + ;; we continue + (collect c) + (setf rptr (%+ rptr 1))) ))))) + (return)) + ((,predicate (%rune buf rptr)) + ;; we stop + (setf (xstream-read-ptr ,input) rptr) + (setf ,res buf ,res-start p0 ,res-end rptr) + (return) ) + (t + we continue + (sf rptr (%+ rptr 1))) )) + ,@body )) +||# + +(defmacro read-data-until* ((predicate input res res-start res-end) &body body) + "Read data from `input' until `predicate' applied to the read char + turns true. Then execute `body' with `res', `res-start', `res-end' + bound to denote a subsequence (of RUNEs) containing the read portion. + The rune upon which `predicate' turned true is neither consumed from + the stream, nor included in `res'. + + Keep the predicate short, this it may be included more than once into + the macro's expansion." + ;; + (let ((input-var (gensym)) + (collect (gensym)) + (c (gensym))) + `(LET ((,input-var ,input)) + (MULTIPLE-VALUE-BIND (,res ,res-start ,res-end) + (WITH-RUNE-COLLECTOR/RAW (,collect) + (LOOP + (LET ((,c (PEEK-RUNE ,input-var))) + (COND ((EQ ,c :EOF) + ;; xxx error message + (RETURN)) + ((FUNCALL ,predicate ,c) + (RETURN)) + (t + (,collect ,c) + (CONSUME-RUNE ,input-var)))))) + (LOCALLY + ,@body))))) + +(defun read-name-token (input) + (read-data-until* ((lambda (rune) + (declare (type rune rune)) + (not (name-rune-p rune))) + input + r rs re) + (intern-name r rs re))) + +(defun read-cdata (input) + (read-data-until* ((lambda (rune) + (declare (type rune rune)) + (when (and (%rune< rune #/U+0020) + (not (or (%rune= rune #/U+0009) + (%rune= rune #/U+000a) + (%rune= rune #/U+000d)))) + (wf-error input "code point invalid: ~A" rune)) + (or (%rune= rune #/<) (%rune= rune #/&))) + input + source start end) + (locally + (declare (type (simple-array rune (*)) source) + (type ufixnum start) + (type ufixnum end) + (optimize (speed 3) (safety 0))) + (let ((res (make-array (%- end start) :element-type 'rune))) + (declare (type (simple-array rune (*)) res)) + (let ((i (%- end start))) + (declare (type ufixnum i)) + (loop + (setf i (- i 1)) + (setf (%rune res i) (%rune source (the ufixnum (+ i start)))) + (when (= i 0) + (return)))) + res)))) + +(defun internal-entity-expansion (name) + (let ((def (get-entity-definition name :general (dtd *ctx*)))) + (unless def + (wf-error nil "Entity '~A' is not defined." (rod-string name))) + (unless (typep def 'internal-entdef) + (wf-error nil "Entity '~A' is not an internal entity." name)) + (or (entdef-expansion def) + (setf (entdef-expansion def) (find-internal-entity-expansion name))))) + +(defun find-internal-entity-expansion (name) + (let ((zinput (make-zstream))) + (with-rune-collector-3 (collect) + (labels ((muffle (input) + (let (c) + (loop + (setf c (read-rune input)) + (cond ((eq c :eof) + (return)) + ((rune= c #/&) + (setf c (peek-rune input)) + (cond ((eql c :eof) + (eox input)) + ((rune= c #/#) + (let ((c (read-character-reference input))) + (%put-unicode-char c collect))) + (t + (unless (name-start-rune-p c) + (wf-error zinput "Expecting name after &.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (check-rune input c #/;) + (recurse-on-entity + zinput name :general + (lambda (zinput) + (muffle (car (zstream-input-stack zinput))))))))) + ((rune= c #/<) + (wf-error zinput "unexpected #/<")) + ((space-rune-p c) + (collect #/space)) + ((not (data-rune-p c)) + (wf-error zinput "illegal char: ~S." c)) + (t + (collect c))))))) + (declare (dynamic-extent #'muffle)) + (recurse-on-entity + zinput name :general + (lambda (zinput) + (muffle (car (zstream-input-stack zinput))))) )))) + +(defun resolve-entity (name handler dtd) + (let ((*validate* nil)) + (if (get-entity-definition name :general dtd) + (let* ((*ctx* (make-context :handler handler :dtd dtd)) + (input (make-zstream)) + (*data-behaviour* :DOC)) + (with-scratch-pads () + (recurse-on-entity + input name :general + (lambda (input) + (prog1 + (etypecase (checked-get-entdef name :general) + (internal-entdef (p/content input)) + (external-entdef (p/ext-parsed-ent input))) + (unless (eq (peek-token input) :eof) + (wf-error input "Trailing garbage. - ~S" + (peek-token input)))))))) + nil))) + +(defun read-att-value-2 (input) + (let ((delim (read-rune input))) + (when (eql delim :eof) + (eox input)) + (unless (member delim '(#/" #/') :test #'eql) + (wf-error input + "Bad attribute value delimiter ~S, must be either #\" or #\'." + (rune-char delim))) + (with-rune-collector-4 (collect) + (loop + (let ((c (read-rune input))) + (cond ((eq c :eof) + (eox input "EOF")) + ((rune= c delim) + (return)) + ((rune= c #/<) + (wf-error input "'<' not allowed in attribute values")) + ((rune= #/& c) + (multiple-value-bind (kind sem) (read-entity-like input) + (ecase kind + (:CHARACTER-REFERENCE + (%put-unicode-char sem collect)) + (:ENTITY-REFERENCE + (let* ((exp (internal-entity-expansion sem)) + (n (length exp))) + (declare (type (simple-array rune (*)) exp)) + (do ((i 0 (%+ i 1))) + ((%= i n)) + (collect (%rune exp i)))))))) + ((space-rune-p c) + (collect #/u+0020)) + (t + (collect c)))))))) + +;;;;;;;;;;;;;;;;; + +;;; Namespace stuff + +;; We already know that name is part of a valid XML name, so all we +;; have to check is that the first rune is a name-start-rune and that +;; there is not colon in it. +(defun nc-name-p (name) + (and (plusp (length name)) + (name-start-rune-p (rune name 0)) + (notany #'(lambda (rune) (rune= #/: rune)) name))) + +(defun split-qname (qname) + (declare (type runes:simple-rod qname)) + (let ((pos (position #/: qname))) + (if pos + (let ((prefix (subseq qname 0 pos)) + (local-name (subseq qname (1+ pos)))) + (when (zerop pos) + (wf-error nil "empty namespace prefix")) + (if (nc-name-p local-name) + (values prefix local-name) + (wf-error nil "~S is not a valid NcName." + (rod-string local-name)))) + (values () qname)))) + +(defun decode-qname (qname) + "decode-qname name => namespace-uri, prefix, local-name" + (declare (type runes:simple-rod qname)) + (multiple-value-bind (prefix local-name) (split-qname qname) + (let ((uri (find-namespace-binding prefix))) + (if uri + (values uri prefix local-name) + (values nil nil qname))))) + + +(defun find-namespace-binding (prefix) + (cdr (or (assoc (or prefix #"") *namespace-bindings* :test #'rod=) + (wf-error nil "Undeclared namespace prefix: ~A" (rod-string prefix))))) + +;; FIXME: Should probably be refactored by adding :start and :end to rod=/rod-equal +(defun rod-starts-with (prefix rod) + (and (<= (length prefix) (length rod)) + (dotimes (i (length prefix) t) + (unless (rune= (rune prefix i) (rune rod i)) + (return nil))))) + +(defun xmlns-attr-p (attr-name) + (rod-starts-with #.(string-rod "xmlns") attr-name)) + +(defun attrname->prefix (attrname) + (if (< 5 (length attrname)) + (subseq attrname 6) + nil)) + +(defun find-namespace-declarations (attributes) + (loop + for attribute in attributes + for qname = (sax:attribute-qname attribute) + when (xmlns-attr-p qname) + collect (cons (attrname->prefix qname) (sax:attribute-value attribute)))) + +(defun declare-namespaces (attributes) + (let ((ns-decls (find-namespace-declarations attributes))) + (dolist (ns-decl ns-decls) + ;; check some namespace validity constraints + (let ((prefix (car ns-decl)) + (uri (cdr ns-decl))) + (cond + ((and (rod= prefix #"xml") + (not (rod= uri #"http://www.w3.org/XML/1998/namespace"))) + (wf-error nil + "Attempt to rebind the prefix "xml" to ~S." (mu uri))) + ((and (rod= uri #"http://www.w3.org/XML/1998/namespace") + (not (rod= prefix #"xml"))) + (wf-error nil + "The namespace ~ + URI "http://www.w3.org/XML/1998/namespace%5C" may not ~ + be bound to the prefix ~S, only "xml" is legal." + (mu prefix))) + ((and (rod= prefix #"xmlns") + (rod= uri #"http://www.w3.org/2000/xmlns/")) + (wf-error nil + "Attempt to bind the prefix "xmlns" to its predefined ~ + URI "http://www.w3.org/2000/xmlns/%5C", which is ~ + forbidden for no good reason.")) + ((rod= prefix #"xmlns") + (wf-error nil + "Attempt to bind the prefix "xmlns" to the URI ~S, ~ + but it may not be declared." (mu uri))) + ((rod= uri #"http://www.w3.org/2000/xmlns/") + (wf-error nil + "The namespace URI "http://www.w3.org/2000/xmlns/%5C" may ~ + not be bound to prefix ~S (or any other)." (mu prefix))) + ((and (rod= uri #"") prefix) + (wf-error nil + "Only the default namespace (the one without a prefix) ~ + may be bound to an empty namespace URI, thus ~ + undeclaring it.")) + (t + (push (cons prefix (if (rod= #"" uri) nil uri)) + *namespace-bindings*) + (sax:start-prefix-mapping (handler *ctx*) + (car ns-decl) + (cdr ns-decl)))))) + ns-decls)) + +(defun undeclare-namespaces (ns-decls) + (dolist (ns-decl ns-decls) + (sax:end-prefix-mapping (handler *ctx*) (car ns-decl)))) + +(defun build-attribute-list (attr-alist) + ;; fixme: if there is a reason this function reverses attribute order, + ;; it should be documented. + (let (attributes) + (dolist (pair attr-alist) + (push (sax:make-attribute :qname (car pair) + :value (cdr pair) + :specified-p t) + attributes)) + attributes)) + +(defun check-attribute-uniqueness (attributes) + ;; 5.3 Uniqueness of Attributes + ;; In XML documents conforming to [the xmlns] specification, no + ;; tag may contain two attributes which: + ;; 1. have identical names, or + ;; 2. have qualified names with the same local part and with + ;; prefixes which have been bound to namespace names that are + ;; identical. + ;; + ;; 1. is checked by read-tag-2, so we only deal with 2 here + (loop for (attr-1 . rest) on attributes do + (when (and (sax:attribute-namespace-uri attr-1) + (find-if (lambda (attr-2) + (and (rod= (sax:attribute-namespace-uri attr-1) + (sax:attribute-namespace-uri attr-2)) + (rod= (sax:attribute-local-name attr-1) + (sax:attribute-local-name attr-2)))) + rest)) + (wf-error nil + "Multiple definitions of attribute ~S in namespace ~S." + (mu (sax:attribute-local-name attr-1)) + (mu (sax:attribute-namespace-uri attr-1)))))) + +(defun set-attribute-namespace (attribute) + (let ((qname (sax:attribute-qname attribute))) + (if (and sax:*use-xmlns-namespace* (rod= qname #"xmlns")) + (setf (sax:attribute-namespace-uri attribute) + #"http://www.w3.org/2000/xmlns/") + (multiple-value-bind (prefix local-name) (split-qname qname) + (declare (ignorable local-name)) + (when (and prefix ;; default namespace doesn't apply to attributes + (or (not (rod= #"xmlns" prefix)) + sax:*use-xmlns-namespace*)) + (multiple-value-bind (uri prefix local-name) + (decode-qname qname) + (declare (ignore prefix)) + (setf (sax:attribute-namespace-uri attribute) uri) + (setf (sax:attribute-local-name attribute) local-name))))))) + +;;;;;;;;;;;;;;;;; + +;; System Identifier Protocol + +;; A system identifier is an object obeying to the system identifier +;; protocol. Often something like an URL or a pathname. + +;; OPEN-SYS-ID sys-id [generic function] +;; +;; Opens the resource associated with the system identifier `sys-id' +;; for reading and returns a stream. For now it is expected, that the +;; stream is an octet stream (one of element type (unsigned-byte 8)). +;; +;; More precisely: The returned object only has to obey to the xstream +;; controller protocol. (That is it has to provide implementations for +;; READ-OCTETS and XSTREAM-CONTROLLER-CLOSE). + +;; MERGE-SYS-ID sys-id base [generic function] +;; +;; Merges two system identifiers. That is resolve `sys-id' relative to +;; `base' yielding an absolute system identifier suitable for +;; OPEN-SYS-ID. + + +;;;;;;;;;;;;;;;;; +;;; SAX validation handler + +(defclass validator () + ((context :initarg :context :accessor context) + (cdatap :initform nil :accessor cdatap))) + +(defun make-validator (dtd root) + (make-instance 'validator + :context (make-context + :handler nil + :dtd dtd + :model-stack (list (make-root-model root))))) + +(macrolet ((with-context ((validator) &body body) + `(let ((*ctx* (context ,validator)) + (*validate* t)) + (with-scratch-pads () ;nicht schoen + ,@body)))) + (defmethod sax:start-element ((handler validator) uri lname qname attributes) + uri lname + (with-context (handler) + (validate-start-element *ctx* qname) + (process-attributes *ctx* qname attributes))) + + (defmethod sax:start-cdata ((handler validator)) + (setf (cdatap handler) t)) + + (defmethod sax:characters ((handler validator) data) + (with-context (handler) + (validate-characters *ctx* (if (cdatap handler) #"hack" data)))) + + (defmethod sax:end-cdata ((handler validator)) + (setf (cdatap handler) nil)) + + (defmethod sax:end-element ((handler validator) uri lname qname) + uri lname + (with-context (handler) + (validate-end-element *ctx* qname))))
Added: vendor/cxml/xmlns-normalizer.lisp =================================================================== --- vendor/cxml/xmlns-normalizer.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/xmlns-normalizer.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,131 @@ +;;;; xmlns-normalizer.lisp -- DOM 3-style namespace normalization +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Copyright (c) 2005 David Lichteblau + +;;;; Hier eine Variante des reichlich furchtbaren Algorithmus zur +;;;; Namespace-Normalisierung aus DOM 3 Core.[1] +;;;; +;;;; Gebraucht wir die Sache, weil Element- und Attributknoten in DOM +;;;; zwar ein Prefix-Attribut speichern, massgeblich fuer ihren Namespace +;;;; aber nur die URI sein soll. Und eine Anpassung der zugehoerigen +;;;; xmlns-Attribute findet bei Veraenderungen im DOM-Baum nicht statt, +;;;; bzw. wird dem Nutzer ueberlassen. +;;;; +;;;; Daher muss letztlich spaetestens beim Serialisieren eine +;;;; Namespace-Deklaration fuer die angegebene URI nachgetragen und das +;;;; Praefix ggf. umbenannt werden, damit am Ende doch etwas +;;;; Namespace-konformes heraus kommt. +;;;; +;;;; Und das nennen sie dann Namespace-Support. +;;;; +;;;; [1] http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/namespaces-algorithm... + +(in-package :cxml) + +(defclass namespace-normalizer (sax-proxy) + ((xmlns-stack :initarg :xmlns-stack :accessor xmlns-stack))) + +(defvar *xmlns-namespace* #"http://www.w3.org/2000/xmlns/") + +(defun make-namespace-normalizer (chained-handler) + (make-instance 'namespace-normalizer + :xmlns-stack (list (mapcar (lambda (cons) + (make-xmlns-attribute (car cons) (cdr cons))) + *namespace-bindings*)) + :chained-handler chained-handler)) + +(defun normalizer-find-prefix (handler prefix) + (block t + (dolist (bindings (xmlns-stack handler)) + (dolist (attribute bindings) + (when (rod= (sax:attribute-local-name attribute) prefix) + (return-from t attribute)))))) + +(defun normalizer-find-uri (handler uri) + (block t + (dolist (bindings (xmlns-stack handler)) + (dolist (attribute bindings) + (when (and (rod= (sax:attribute-value attribute) uri) + ;; default-namespace interessiert uns nicht + (not (rod= (sax:attribute-qname attribute) #"xmlns"))) + (return-from t attribute)))))) + +(defun make-xmlns-attribute (prefix uri) + (if prefix + (sax:make-attribute + :qname (concatenate 'rod #"xmlns:" prefix) + :namespace-uri *xmlns-namespace* + :local-name prefix + :value uri) + (sax:make-attribute + :qname #"xmlns" + :namespace-uri *xmlns-namespace* + :local-name #"xmlns" + :value uri))) + +(defun rename-attribute (a new-prefix) + (setf (sax:attribute-qname a) + (concatenate 'rod new-prefix #":" (sax:attribute-local-name a)))) + +(defmethod sax:start-element + ((handler namespace-normalizer) uri lname qname attrs) + (declare (ignore qname)) + (when (null uri) + (setf uri #"")) + (let ((normal-attrs '())) + (push nil (xmlns-stack handler)) + (dolist (a attrs) + (if (rod= *xmlns-namespace* (sax:attribute-namespace-uri a)) + (push a (car (xmlns-stack handler))) + (push a normal-attrs))) + (flet ((push-namespace (prefix uri) + (let ((new (make-xmlns-attribute prefix uri))) + (push new (car (xmlns-stack handler))) + (push new attrs)))) + (multiple-value-bind (prefix local-name) (split-qname qname) + (setf lname local-name) + (let ((binding (normalizer-find-prefix handler prefix))) + (cond + ((null binding) + (unless (and (null prefix) (zerop (length uri))) + (push-namespace prefix uri))) + ((rod= (sax:attribute-value binding) uri)) + ((member binding (car (xmlns-stack handler))) + (setf (sax:attribute-value binding) uri)) + (t + (push-namespace prefix uri))))) + (dolist (a normal-attrs) + (let ((u (sax:attribute-namespace-uri a))) + (when u + (let* ((prefix (split-qname (sax:attribute-qname a))) + (prefix-binding + (when prefix + (normalizer-find-prefix handler prefix)))) + (when (or (null prefix-binding) + (not (rod= (sax:attribute-value prefix-binding) u))) + (let ((uri-binding (normalizer-find-uri handler u))) + (cond + (uri-binding + (rename-attribute + a + (sax:attribute-local-name uri-binding))) + ((null prefix-binding) + (push-namespace prefix u)) + (t + (loop + for i from 1 + for prefix = (rod (format nil "NS~D" i)) + unless (normalizer-find-prefix handler prefix) + do + (push-namespace prefix u) + (rename-attribute a prefix) + (return)))))))))))) + (sax:start-element (proxy-chained-handler handler) uri lname qname attrs)) + +(defmethod sax:end-element ((handler namespace-normalizer) uri lname qname) + (declare (ignore qname)) + (pop (xmlns-stack handler)) + (sax:end-element (proxy-chained-handler handler) (or uri #"") lname qname))
Added: vendor/cxml/xmls-compat.lisp =================================================================== --- vendor/cxml/xmls-compat.lisp 2006-02-18 09:24:24 UTC (rev 1844) +++ vendor/cxml/xmls-compat.lisp 2006-02-18 09:34:15 UTC (rev 1845) @@ -0,0 +1,159 @@ +;;;; xml-compat.lisp -- XMLS-compatible data structures +;;;; +;;;; This file is part of the CXML parser, released under Lisp-LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Developed 2004 for headcraft - http://headcraft.de/ +;;;; Copyright: David Lichteblau + +;;;; XXX Der namespace-Support in xmls kommt mir zweifelhaft vor. +;;;; Wir immitieren das soweit es gebraucht wurde bisher. + +(defpackage cxml-xmls + (:use :cl :runes) + (:export #:make-node #:node-name #:node-ns #:node-attrs #:node-children + #:make-xmls-builder #:map-node)) + +(in-package :cxml-xmls) + + +;;;; Knoten + +(defun make-node (&key name ns attrs children) + `(,(if ns (cons name ns) name) + ,attrs + ,@children)) + +(defun node-name (node) + (let ((car (car node))) + (if (consp car) + (car car) + car))) + +(defun (setf node-name) (newval node) + (let ((car (car node))) + (if (consp car) + (setf (car car) newval) + (setf (car node) newval)))) + +(defun node-ns (node) + (let ((car (car node))) + (if (consp car) + (cdr car) + nil))) + +(defun (setf node-ns) (newval node) + (let ((car (car node))) + (if (consp car) + (setf (cdr car) newval) + (setf (car node) (cons car newval))) + newval)) + +(defun node-attrs (node) + (cadr node)) + +(defun (setf node-attrs) (newval node) + (setf (cadr node) newval)) + +(defun node-children (node) + (cddr node)) + +(defun (setf node-children) (newval node) + (setf (cddr node) newval)) + + +;;;; SAX-Handler (Parser) + +(defclass xmls-builder () + ((element-stack :initform nil :accessor element-stack) + (root :initform nil :accessor root) + (include-default-values :initform t + :initarg :include-default-values + :accessor include-default-values))) + +(defun make-xmls-builder (&key (include-default-values t)) + (make-instance 'xmls-builder :include-default-values include-default-values)) + +(defmethod sax:end-document ((handler xmls-builder)) + (root handler)) + +(defmethod sax:start-element + ((handler xmls-builder) namespace-uri local-name qname attributes) + (declare (ignore namespace-uri)) + (setf local-name (or local-name qname)) + (let* ((attributes + (loop + for attr in attributes + when (or (sax:attribute-specified-p attr) + (include-default-values handler)) + collect + (list (sax:attribute-qname attr) + (sax:attribute-value attr)))) + (node (make-node :name local-name + :ns (let ((lq (length qname)) + (ll (length local-name))) + (if (eql lq ll) + nil + (subseq qname 0 (- lq ll 1)))) + :attrs attributes)) + (parent (car (element-stack handler)))) + (if parent + (push node (node-children parent)) + (setf (root handler) node)) + (push node (element-stack handler)))) + +(defmethod sax:end-element + ((handler xmls-builder) namespace-uri local-name qname) + (declare (ignore namespace-uri local-name qname)) + (let ((node (pop (element-stack handler)))) + (setf (node-children node) (reverse (node-children node))))) + +(defmethod sax:characters ((handler xmls-builder) data) + (let* ((parent (car (element-stack handler))) + (prev (car (node-children parent)))) + ;; Be careful to accept both rods and strings here, so that xmls can be + ;; used with strings even if cxml is configured to use octet string rods. + (if (typep prev '(or rod string)) + ;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer + ;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten + ;; erweitern, sonst ist das Dokument nicht normalisiert. + ;; (XXX Oder sollte man besser den Parser entsprechend aendern?) + (setf (car (node-children parent)) + (concatenate `(vector ,(array-element-type prev)) + prev + data)) + (push data (node-children parent))))) + + +;;;; SAX-Treiber (fuer Serialisierung) + +(defun map-node + (handler node + &key (include-xmlns-attributes sax:*include-xmlns-attributes*)) + (sax:start-document handler) + (labels ((walk (node) + (let* ((attlist + (compute-attributes node include-xmlns-attributes)) + (lname (rod (node-name node))) + (ns (rod (node-ns node))) + (qname (concatenate 'rod ns (rod ":") lname))) + ;; fixme: namespaces + (sax:start-element handler nil lname qname attlist) + (dolist (child (node-children node)) + (typecase child + (list (walk child)) + ((or string rod) (sax:characters handler (rod child))))) + (sax:end-element handler nil lname qname)))) + (walk node)) + (sax:end-document handler)) + +(defun compute-attributes (node xmlnsp) + (remove nil + (mapcar (lambda (a) + (destructuring-bind (name value) a + (if (or xmlnsp (not (cxml::xmlns-attr-p (rod name)))) + (sax:make-attribute :qname (rod name) + :value (rod value) + :specified-p t) + nil))) + (node-attrs node))))