Update of /project/cxml/cvsroot/cxml/runes In directory clnet:/tmp/cvs-serv20513/runes
Modified Files: package.lisp ystream.lisp Added Files: stream-scl.lisp Log Message: SCL support (thanks to Douglas Crosher). Includes support for implementations where URIs are valid namestrings, and a mode where normal streams are used instead of xstreams and ystreams (albeit both SCL-specific at this point).
--- /project/cxml/cvsroot/cxml/runes/package.lisp 2006/12/02 13:21:36 1.7 +++ /project/cxml/cvsroot/cxml/runes/package.lisp 2007/06/16 11:27:19 1.8 @@ -79,7 +79,11 @@ #:make-string-ystream/utf8 ;; #+rune-is-integer #:make-character-stream-ystream/utf8 - #:runes-to-utf8/adjustable-string)) + #:runes-to-utf8/adjustable-string + + #:rod-to-utf8-string + #:utf8-string-to-rod + #:make-octet-input-stream))
(defpackage :utf8-runes (:use :cl) --- /project/cxml/cvsroot/cxml/runes/ystream.lisp 2006/12/02 13:21:36 1.5 +++ /project/cxml/cvsroot/cxml/runes/ystream.lisp 2007/06/16 11:27:19 1.6 @@ -248,3 +248,50 @@
(defmethod close-ystream ((ystream string-ystream/utf8)) (get-output-stream-string (ystream-os-stream ystream)))) + + +;;;; helper functions + +(defun rod-to-utf8-string (rod) + (let ((out (make-buffer :element-type 'character))) + (runes-to-utf8/adjustable-string out rod (length rod)) + out)) + +(defun utf8-string-to-rod (str) + (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)) + (buffer (make-array (length bytes) :element-type '(unsigned-byte 16))) + (n (runes-encoding:decode-sequence + :utf-8 bytes 0 (length bytes) buffer 0 0 nil)) + (result (make-array n :element-type 'rune))) + (map-into result #'code-rune buffer) + result)) + +(defclass octet-input-stream + (trivial-gray-stream-mixin fundamental-binary-input-stream) + ((octets :initarg :octets) + (pos :initform 0))) + +(defmethod close ((stream octet-input-stream) &key abort) + (declare (ignore abort)) + (open-stream-p stream)) + +(defmethod stream-read-byte ((stream octet-input-stream)) + (with-slots (octets pos) stream + (if (>= pos (length octets)) + :eof + (prog1 + (elt octets pos) + (incf pos))))) + +(defmethod stream-read-sequence + ((stream octet-input-stream) sequence start end &key &allow-other-keys) + (with-slots (octets pos) stream + (let* ((length (min (- end start) (- (length octets) pos))) + (end1 (+ start length)) + (end2 (+ pos length))) + (replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2) + (setf pos end2) + end1))) + +(defun make-octet-input-stream (octets) + (make-instance 'octet-input-stream :octets octets))
--- /project/cxml/cvsroot/cxml/runes/stream-scl.lisp 2007/06/16 11:27:19 NONE +++ /project/cxml/cvsroot/cxml/runes/stream-scl.lisp 2007/06/16 11:27:19 1.1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*- ;;; --------------------------------------------------------------------------- ;;; Title: Fast streams ;;; Created: 1999-07-17 ;;; Author: Douglas Crosher ;;; License: Lisp-LGPL (See file COPYING for details). ;;; --------------------------------------------------------------------------- ;;; (c) copyright 2007 by Douglas Crosher
;;; This library is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Library General Public ;;; License as published by the Free Software Foundation; either ;;; version 2 of the License, or (at your option) any later version. ;;; ;;; This library is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Library General Public License for more details. ;;; ;;; You should have received a copy of the GNU Library General Public ;;; License along with this library; if not, write to the ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;;; Boston, MA 02111-1307 USA.
(in-package :runes)
(eval-when (:compile-toplevel :load-toplevel :execute) (defparameter *fast* '(optimize (speed 3) (safety 3))))
(deftype runes-encoding:encoding-error () 'ext:character-conversion-error)
;;; xstream
(eval-when (:compile-toplevel :load-toplevel :execute)
(defclass xstream (ext:character-stream) ((name :initarg :name :initform nil :accessor xstream-name) (column :initarg :column :initform 0) (line :initarg :line :initform 1) (unread-column :initarg :unread-column :initform 0)))
(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream) ())
) ; eval-when
(defun make-eol-conversion-xstream (source-stream) "Returns a character stream that conversion CR-LF pairs and lone CR characters into single linefeed character." (declare (type stream source-stream)) (let ((stream (ext:make-eol-conversion-stream source-stream :input t :close-stream-p t))) (change-class stream 'eol-conversion-xstream)))
(definline xstream-p (stream) (typep stream 'xstream))
(defun close-xstream (input) (close input))
(definline read-rune (input) (declare (type stream input) (inline read-char) #.*fast*) (let ((char (read-char input nil :eof))) (cond ((member char '(#\UFFFE #\UFFFF)) ;; These characters are illegal within XML documents. (simple-error 'ext:character-conversion-error "~@<Illegal XML document character: ~S~:@>" char)) ((eql char #\linefeed) (setf (slot-value input 'unread-column) (slot-value input 'column)) (setf (slot-value input 'column) 0) (incf (the kernel:index (slot-value input 'line)))) (t (incf (the kernel:index (slot-value input 'column))))) char))
(definline peek-rune (input) (declare (type stream input) (inline peek-char) #.*fast*) (peek-char nil input nil :eof))
(definline consume-rune (input) (declare (type stream input) (inline read-rune) #.*fast*) (read-rune input) nil)
(definline unread-rune (rune input) (declare (type stream input) (inline unread-char) #.*fast*) (unread-char rune input) (cond ((eql rune #\linefeed) (setf (slot-value input 'column) (slot-value input 'unread-column)) (setf (slot-value input 'unread-column) 0) (decf (the kernel:index (slot-value input 'line)))) (t (decf (the kernel:index (slot-value input 'column))))) nil)
(defun fread-rune (input) (read-rune input))
(defun fpeek-rune (input) (peek-rune input))
(defun xstream-position (input) (file-position input))
(defun runes-encoding:find-encoding (encoding) encoding)
(defun make-xstream (os-stream &key name (speed 8192) (initial-speed 1) (initial-encoding :guess)) (declare (ignore speed)) (assert (eql initial-speed 1)) (assert (eq initial-encoding :guess)) (let* ((stream (ext:make-xml-character-conversion-stream os-stream :input t :close-stream-p t)) (xstream (make-eol-conversion-xstream stream))) (setf (xstream-name xstream) name) xstream))
(defclass xstream-string-input-stream (lisp::string-input-stream xstream) ())
(defun make-rod-xstream (string &key name) (declare (type string string)) (let ((stream (make-string-input-stream string))) (change-class stream 'xstream-string-input-stream :name name)))
;;; already at 'full speed' so just return the buffer size. (defun set-to-full-speed (stream) (length (ext:stream-in-buffer stream)))
(defun xstream-speed (stream) (length (ext:stream-in-buffer stream)))
(defun xstream-line-number (stream) (slot-value stream 'line))
(defun xstream-column-number (stream) (slot-value stream 'column))
(defun xstream-encoding (stream) (stream-external-format stream))
;;; the encoding will have already been detected, but it is checked against the ;;; declared encoding here. (defun (setf xstream-encoding) (declared-encoding stream) (let* ((initial-encoding (xstream-encoding stream)) (canonical-encoding (cond ((and (eq initial-encoding :utf-16le) (member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le) :test 'string-equal)) :utf-16le) ((and (eq initial-encoding :utf-16be) (member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be) :test 'string-equal)) :utf-16be) ((and (eq initial-encoding :ucs-4be) (member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be) :test 'string-equal)) :ucs4-be) ((and (eq initial-encoding :ucs-4le) (member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le) :test 'string-equal)) :ucs4-le) (t declared-encoding)))) (unless (string-equal initial-encoding canonical-encoding) (warn "Unable to change xstream encoding from ~S to ~S (~S)~%" initial-encoding declared-encoding canonical-encoding)) declared-encoding))
;;; ystream - a run output stream.
(deftype ystream () 'stream)
(defun ystream-column (stream) (ext:line-column stream))
(definline write-rune (rune stream) (declare (inline write-char)) (write-char rune stream))
(defun write-rod (rod stream) (declare (type rod rod) (type stream stream)) (write-string rod stream))
(defun make-rod-ystream () (make-string-output-stream))
(defun close-ystream (stream) (etypecase stream (ext:string-output-stream (get-output-stream-string stream)) (ext:character-conversion-output-stream (let ((target (slot-value stream 'stream))) (close stream) (if (typep target 'ext:byte-output-stream) (ext:get-output-stream-bytes target) stream)))))
;;;; CHARACTER-STREAM-YSTREAM
(defun make-character-stream-ystream (target-stream) target-stream)
;;;; OCTET-VECTOR-YSTREAM
(defun make-octet-vector-ystream () (let ((target (ext:make-byte-output-stream))) (ext:make-character-conversion-stream target :output t :external-format :utf-8 :close-stream-p t)))
;;;; OCTET-STREAM-YSTREAM
(defun make-octet-stream-ystream (os-stream) (ext:make-character-conversion-stream os-stream :output t :external-format :utf-8 :close-stream-p t))
;;;; helper functions
(defun rod-to-utf8-string (rod) (ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8) :iso-8859-1))
(defun utf8-string-to-rod (str) (let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))) (ext:make-string-from-bytes bytes :utf-8)))
(defun make-octet-input-stream (octets) (ext:make-byte-input-stream octets))