o Add support for the Scieneer CL.
The Scieneer CL has a very fast stream implementation which is comparable in performance to the runes implementation so CXML on the SCL can use CL streams rather than xstreams and ystreams - enabled via the :x&y-streams-are-stream feature. This also enables support for a wider range of encodings.
The Scieneer CL also supports http and https URLs for pathnames so can open such external files without the need for an entity resolver.
Regards Douglas Crosher
o Add support for the Scieneer CL.
? runes/stream-scl.lisp Index: cxml.asd =================================================================== RCS file: /project/cxml/cvsroot/cxml/cxml.asd,v retrieving revision 1.18 diff -u -r1.18 cxml.asd --- cxml.asd 26 May 2007 21:55:57 -0000 1.18 +++ cxml.asd 13 Jun 2007 08:15:02 -0000 @@ -30,7 +30,7 @@ (:file "space-normalizer" :depends-on ("xml-parse")) (:file "catalog" :depends-on ("xml-parse")) (:file "sax-proxy" :depends-on ("xml-parse"))) - :depends-on (:runes :puri :trivial-gray-streams)) + :depends-on (:runes :puri #-scl :trivial-gray-streams))
(defclass utf8dom-file (closure-source-file) ((of)))
Index: runes.asd =================================================================== RCS file: /project/cxml/cvsroot/cxml/runes.asd,v retrieving revision 1.1 diff -u -r1.1 runes.asd --- runes.asd 26 May 2007 22:05:13 -0000 1.1 +++ runes.asd 13 Jun 2007 08:15:03 -0000 @@ -52,7 +52,9 @@ #+rune-is-character "characters") #+rune-is-integer (:file "utf8") (:file "syntax") - (:file "encodings") - (:file "encodings-data") - (:file "xstream") - (:file "ystream"))) + #-x&y-streams-are-stream (:file "encodings") + #-x&y-streams-are-stream (:file "encodings-data") + #-x&y-streams-are-stream (:file "xstream") + #-x&y-streams-are-stream (:file "ystream") + #+x&y-streams-are-stream (:file #+scl "stream-scl") + )) Index: xml/package.lisp =================================================================== RCS file: /project/cxml/cvsroot/cxml/xml/package.lisp,v retrieving revision 1.16 diff -u -r1.16 package.lisp --- xml/package.lisp 1 May 2007 20:07:00 -0000 1.16 +++ xml/package.lisp 13 Jun 2007 08:15:04 -0000 @@ -6,7 +6,7 @@ (in-package :cl-user)
(defpackage :cxml - (:use :cl :runes :runes-encoding :trivial-gray-streams) + (:use :cl :runes :runes-encoding #-scl :trivial-gray-streams) (:export ;; xstreams #:make-xstream Index: xml/unparse.lisp =================================================================== RCS file: /project/cxml/cvsroot/cxml/xml/unparse.lisp,v retrieving revision 1.13 diff -u -r1.13 unparse.lisp --- xml/unparse.lisp 1 May 2007 20:07:00 -0000 1.13 +++ xml/unparse.lisp 13 Jun 2007 08:15:04 -0000 @@ -621,14 +621,22 @@ data)
(defun rod-to-utf8-string (rod) + #-scl (let ((out (make-buffer :element-type 'character))) (runes-to-utf8/adjustable-string out rod (length rod)) - out)) + out) + #+scl + (ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8) + :iso-8859-1))
(defun utf8-string-to-rod (str) + #-scl (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)) (buffer (make-array (length bytes) :element-type '(unsigned-byte 16))) (n (decode-sequence :utf-8 bytes 0 (length bytes) buffer 0 0 nil)) (result (make-array n :element-type 'rune))) (map-into result #'code-rune buffer) - result)) + result) + #+scl + (let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))) + (ext:make-string-from-bytes bytes :utf-8))) Index: xml/xml-parse.lisp =================================================================== RCS file: /project/cxml/cvsroot/cxml/xml/xml-parse.lisp,v retrieving revision 1.67 diff -u -r1.67 xml-parse.lisp --- xml/xml-parse.lisp 4 Mar 2007 21:04:13 -0000 1.67 +++ xml/xml-parse.lisp 13 Jun 2007 08:15:07 -0000 @@ -2957,6 +2973,7 @@ ;;;; --------------------------------------------------------------------------- ;;;; User interface ;;;;
+#-scl (defun specific-or (component &optional (alternative nil)) (if (eq component :unspecific) alternative @@ -2967,6 +2984,7 @@ alternative str))
+#-scl (defun make-uri (&rest initargs &key path query &allow-other-keys) (apply #'make-instance 'puri:uri @@ -2974,9 +2992,11 @@ :query (and query (escape-query query)) initargs))
+#-scl (defun escape-path (list) (puri::render-parsed-path list t))
+#-scl (defun escape-query (pairs) (flet ((escape (str) (puri::encode-escaped-encoding str puri::*reserved-characters* t))) @@ -2990,6 +3010,7 @@ (write-char #= s) (write-string (escape (cdr pair)) s))))))
+#-scl (defun uri-parsed-query (uri) (flet ((unescape (str) (puri::decode-escaped-encoding str t puri::*reserved-characters*))) @@ -3005,9 +3026,11 @@ (t nil)))))
+#-scl (defun query-value (name alist) (cdr (assoc name alist :test #'equal)))
+#-scl (defun pathname-to-uri (pathname) (let ((path (append (pathname-directory pathname) @@ -3027,6 +3050,11 @@ (specific-or (pathname-device pathname))) :path path))))
+#+scl +(defun pathname-to-uri (pathname) + (puri:parse-uri (namestring pathname))) + +#-scl (defun parse-name.type (str) (if str (let ((i (position #. str :from-end t))) @@ -3035,6 +3063,7 @@ (values str nil))) (values nil nil)))
+#-scl (defun uri-to-pathname (uri) (let ((scheme (puri:uri-scheme uri)) (path (puri:uri-parsed-path uri))) @@ -3058,11 +3087,17 @@ :directory (cons :absolute (butlast (cdr path))) :name name :type type)))))) +#+scl +(defun uri-to-pathname (uri) + (let ((pathname (puri:render-uri uri nil))) + (when (equalp (pathname-host pathname) "+") + (setf (slot-value pathname 'lisp::host) "localhost")) + pathname))
(defun parse-xstream (xstream handler &rest args) (let ((*ctx* nil)) (handler-case - (let ((zstream (make-zstream :input-stack (list xstream)))) + (with-zstream (zstream :input-stack (list xstream)) (peek-rune xstream) (with-scratch-pads () (apply #'p/document zstream handler args))) @@ -3129,10 +3164,10 @@ (unless (dtd *ctx*) (with-scratch-pads () (let ((*data-behaviour* :DTD)) - (let* ((xi2 (xstream-open-extid extid)) - (zi2 (make-zstream :input-stack (list xi2)))) - (ensure-dtd) - (p/ext-subset zi2))))) + (let ((xi2 (xstream-open-extid extid))) + (with-zstream (zi2 :input-stack (list xi2)) + (ensure-dtd) + (p/ext-subset zi2)))))) (sax:end-dtd handler) (let ((dtd (dtd *ctx*))) (sax:entity-resolver handler (lambda (n h) (resolve-entity n h dtd))) @@ -3193,15 +3228,18 @@ (defun string->xstream (string) (make-rod-xstream (string-rod string)))
+#-scl (defclass octet-input-stream (trivial-gray-stream-mixin fundamental-binary-input-stream) ((octets :initarg :octets) (pos :initform 0)))
+#-scl (defmethod close ((stream octet-input-stream) &key abort) (declare (ignore abort)) (open-stream-p stream))
+#-scl (defmethod stream-read-byte ((stream octet-input-stream)) (with-slots (octets pos) stream (if (>= pos (length octets)) @@ -3210,6 +3248,7 @@ (elt octets pos) (incf pos)))))
+#-scl (defmethod stream-read-sequence ((stream octet-input-stream) sequence start end &key &allow-other-keys) (with-slots (octets pos) stream @@ -3221,7 +3260,10 @@ end1)))
(defun make-octet-input-stream (octets) - (make-instance 'octet-input-stream :octets octets)) + #-scl + (make-instance 'octet-input-stream :octets octets) + #+scl + (ext:make-byte-input-stream octets))
(defun parse-octets (octets handler &rest args) (apply #'parse-stream (make-octet-input-stream octets) handler args))
;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*- ;;; --------------------------------------------------------------------------- ;;; Title: Fast streams ;;; Created: 1999-07-17 ;;; Author: Douglas Crosher ;;; License: Lisp-LGPL (See file COPYING for details). ;;; --------------------------------------------------------------------------- ;;; (c) copyright 2007 by Douglas Crosher
;;; This library is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Library General Public ;;; License as published by the Free Software Foundation; either ;;; version 2 of the License, or (at your option) any later version. ;;; ;;; This library is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Library General Public License for more details. ;;; ;;; You should have received a copy of the GNU Library General Public ;;; License along with this library; if not, write to the ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;;; Boston, MA 02111-1307 USA.
(in-package :runes)
(eval-when (:compile-toplevel :load-toplevel :execute) (defparameter *fast* '(optimize (speed 3) (safety 3))))
(deftype runes-encoding:encoding-error () 'ext:character-conversion-error)
;;; xstream
(eval-when (:compile-toplevel :load-toplevel :execute)
(defclass xstream (ext:character-stream) ((name :initarg :name :initform nil :accessor xstream-name) (column :initarg :column :initform 0) (line :initarg :line :initform 1) (unread-column :initarg :unread-column :initform 0)))
(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream) ())
) ; eval-when
(defun make-eol-conversion-xstream (source-stream) "Returns a character stream that conversion CR-LF pairs and lone CR characters into single linefeed character." (declare (type stream source-stream)) (let ((stream (ext:make-eol-conversion-stream source-stream :input t :close-stream-p t))) (change-class stream 'eol-conversion-xstream)))
(definline xstream-p (stream) (typep stream 'xstream))
(defun close-xstream (input) (close input))
(definline read-rune (input) (declare (type stream input) (inline read-char) #.*fast*) (let ((char (read-char input nil :eof))) (cond ((member char '(#\UFFFE #\UFFFF)) ;; These characters are illegal within XML documents. (simple-error 'ext:character-conversion-error "~@<Illegal XML document character: ~S~:@>" char)) ((eql char #\linefeed) (setf (slot-value input 'unread-column) (slot-value input 'column)) (setf (slot-value input 'column) 0) (incf (the kernel:index (slot-value input 'line)))) (t (incf (the kernel:index (slot-value input 'column))))) char))
(definline peek-rune (input) (declare (type stream input) (inline peek-char) #.*fast*) (peek-char nil input nil :eof))
(definline consume-rune (input) (declare (type stream input) (inline read-rune) #.*fast*) (read-rune input) nil)
(definline unread-rune (rune input) (declare (type stream input) (inline unread-char) #.*fast*) (unread-char rune input) (cond ((eql rune #\linefeed) (setf (slot-value input 'column) (slot-value input 'unread-column)) (setf (slot-value input 'unread-column) 0) (decf (the kernel:index (slot-value input 'line)))) (t (decf (the kernel:index (slot-value input 'column))))) nil)
(defun fread-rune (input) (read-rune input))
(defun fpeek-rune (input) (peek-rune input))
(defun xstream-position (input) (file-position input))
(defun runes-encoding:find-encoding (encoding) encoding)
(defun make-xstream (os-stream &key name (speed 8192) (initial-speed 1) (initial-encoding :guess)) (declare (ignore speed)) (assert (eql initial-speed 1)) (assert (eq initial-encoding :guess)) (let* ((stream (ext:make-xml-character-conversion-stream os-stream :input t :close-stream-p t)) (xstream (make-eol-conversion-xstream stream))) (setf (xstream-name xstream) name) xstream))
(defclass xstream-string-input-stream (lisp::string-input-stream xstream) ())
(defun make-rod-xstream (string &key name) (declare (type string string)) (let ((stream (make-string-input-stream string))) (change-class stream 'xstream-string-input-stream :name name)))
;;; already at 'full speed' so just return the buffer size. (defun set-to-full-speed (stream) (length (ext:stream-in-buffer stream)))
(defun xstream-speed (stream) (length (ext:stream-in-buffer stream)))
(defun xstream-line-number (stream) (slot-value stream 'line))
(defun xstream-column-number (stream) (slot-value stream 'column))
(defun xstream-encoding (stream) (stream-external-format stream))
;;; the encoding will have already been detected, but it is checked against the ;;; declared encoding here. (defun (setf xstream-encoding) (declared-encoding stream) (let* ((initial-encoding (xstream-encoding stream)) (canonical-encoding (cond ((and (eq initial-encoding :utf-16le) (member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le) :test 'string-equal)) :utf-16le) ((and (eq initial-encoding :utf-16be) (member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be) :test 'string-equal)) :utf-16be) ((and (eq initial-encoding :ucs-4be) (member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be) :test 'string-equal)) :ucs4-be) ((and (eq initial-encoding :ucs-4le) (member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le) :test 'string-equal)) :ucs4-le) (t declared-encoding)))) (unless (string-equal initial-encoding canonical-encoding) (warn "Unable to change xstream encoding from ~S to ~S (~S)~%" initial-encoding declared-encoding canonical-encoding)) declared-encoding))
;;; ystream - a run output stream.
(deftype ystream () 'stream)
(defun ystream-column (stream) (ext:line-column stream))
(definline write-rune (rune stream) (declare (inline write-char)) (write-char rune stream))
(defun write-rod (rod stream) (declare (type rod rod) (type stream stream)) (write-string rod stream))
(defun make-rod-ystream () (make-string-output-stream))
(defun close-ystream (stream) (etypecase stream (ext:string-output-stream (get-output-stream-string stream)) (ext:character-conversion-output-stream (let ((target (slot-value stream 'stream))) (close stream) (if (typep target 'ext:byte-output-stream) (ext:get-output-stream-bytes target) stream)))))
;;;; CHARACTER-STREAM-YSTREAM
(defun make-character-stream-ystream (target-stream) target-stream)
;;;; OCTET-VECTOR-YSTREAM
(defun make-octet-vector-ystream () (let ((target (ext:make-byte-output-stream))) (ext:make-character-conversion-stream target :output t :external-format :utf-8 :close-stream-p t)))
;;;; OCTET-STREAM-YSTREAM
(defun make-octet-stream-ystream (os-stream) (ext:make-character-conversion-stream os-stream :output t :external-format :utf-8 :close-stream-p t))
Quoting Douglas Crosher (dtc@scieneer.com):
o Add support for the Scieneer CL.
The Scieneer CL has a very fast stream implementation which is comparable in performance to the runes implementation so CXML on the SCL can use CL streams rather than xstreams and ystreams - enabled via the :x&y-streams-are-stream feature. This also enables support for a wider range of encodings.
Hmm. When I came to Closure XML, the package glisp/runes was full of implementation-specific code, and I am glad that most read-time conditionals are gone now. So I hesitate to add more hacks for specific implementations.
On the other hand, use of normal streams is the right direction to take in the long term.
For now I have committed your patch as-is, although I would prefer to see other implementations switched to the same strategy if possible. At least Allegro should be easy to support in this way.
Ideally that should be done in a way that minimizing the amount of copy&paste. While the subclassing of STREAM is obviously implementation-dependent (but probably easy to do for most lisps, using whatever version of DEFCLASS or DEFSTRUCT necessary), the rest of the code should be shared.
That leaves the patch with one remaining problem, which had stopped me from pursuing this idea in the past, namely the lack of (SETF STREAM-EXTERNAL-FORMAT) on most Lisps.
I take it that Scieneer does not have (setf s-e-f) either? How does ext:make-xml-character-conversion-stream work?
The Scieneer CL also supports http and https URLs for pathnames so can open such external files without the need for an entity resolver.
Nice.
Applied using #+cxml-system::uri-is-namestring instead of #+scl for clarity. The feature is activated automatically in cxml.asd.
#-x&y-streams-are-stream (:file "encodings")
Where do you add :x&y-streams-are-stream to *features*?
Thank you for integrating the Scieneer CL support.
David Lichteblau wrote: ...
I take it that Scieneer does not have (setf s-e-f) either? How does ext:make-xml-character-conversion-stream work?
The stream-external-format can not be changed reliably on the buffered streams used for CXML because content may have already been consumed and converted. The runes xstream has a 'speed' setting to avoid this problem but this is not implemented in the SCL native buffered streams.
The function 'ext:make-xml-character-conversion-stream peeks ahead multiple bytes to parse the byte order mark and the document declaration encoding and then returns an appropriate character stream that reads from the start of the document. The runes 'figure-encoding function could be expanded to do the same, reading the document declaration to determine the encoding, and then the xstream 'speed' change would not be necessary.
#-x&y-streams-are-stream (:file "encodings")
Where do you add :x&y-streams-are-stream to *features*?
Perhaps this should be the default for the SCL; patch attached. The runes implementation also works, just a little slower and with less supported encodings.
Regards Douglas Crosher
Index: runes.asd =================================================================== RCS file: /project/cxml/cvsroot/cxml/runes.asd,v retrieving revision 1.2 diff -u -r1.2 runes.asd --- runes.asd 16 Jun 2007 11:27:18 -0000 1.2 +++ runes.asd 17 Jun 2007 05:31:19 -0000 @@ -37,6 +37,10 @@ #+rune-is-character (format t "~&;;; Building Closure with CHARACTER RUNES~%")
+;;; Use native streams for the Scieneer CL. +#+scl +(pushnew :x&y-streams-are-stream *features*) + (defsystem :runes :default-component-class closure-source-file :pathname (merge-pathnames