Author: eweitz
Date: Sat May 24 19:34:51 2008
New Revision: 53
Added:
branches/edi/conditions.lisp
Modified:
branches/edi/encode.lisp
branches/edi/output.lisp
branches/edi/strings.lisp
branches/edi/test/test.lisp
Log:
Faster encoding - passes all tests on LW
Added: branches/edi/conditions.lisp
==============================================================================
--- (empty file)
+++ branches/edi/conditions.lisp Sat May 24 19:34:51 2008
@@ -0,0 +1,108 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;; * Redistributions of source code must retain the above copyright
+;;; notice, this list of conditions and the following disclaimer.
+
+;;; * Redistributions in binary form must reproduce the above
+;;; copyright notice, this list of conditions and the following
+;;; disclaimer in the documentation and/or other materials
+;;; provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(define-condition flexi-stream-error (stream-error)
+ ()
+ (:documentation "Superclass for all errors related to flexi
+streams."))
+
+(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition)
+ ()
+ (:documentation "Like FLEXI-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition flexi-stream-element-type-error (flexi-stream-error)
+ ((element-type :initarg :element-type
+ :reader flexi-stream-element-type-error-element-type))
+ (:report (lambda (condition stream)
+ (format stream "Element type ~S not allowed."
+ (flexi-stream-element-type-error-element-type condition))))
+ (:documentation "Errors of this type are signalled if the flexi
+stream has a wrong element type."))
+
+(define-condition flexi-stream-out-of-sync-error (flexi-stream-error)
+ ()
+ (:report (lambda (condition stream)
+ (declare (ignore condition))
+ (format stream "Stream out of sync from previous
+lookahead, couldn't rewind.")))
+ (:documentation "This can happen if you're trying to write to an IO
+stream which had prior to that `looked ahead' while reading and now
+can't `rewind' to the octet where you /should/ be."))
+
+(define-condition in-memory-stream-error (stream-error)
+ ()
+ (:documentation "Superclass for all errors related to
+IN-MEMORY streams."))
+
+(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition)
+ ()
+ (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition in-memory-stream-closed-error (in-memory-stream-error)
+ ()
+ (:report (lambda (condition stream)
+ (format stream "~S is closed."
+ (stream-error-stream condition))))
+ (:documentation "An error that is signalled when someone is trying
+to read from or write to a closed IN-MEMORY stream."))
+
+(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error)
+ ((position-spec :initarg :position-spec
+ :reader in-memory-stream-position-spec-error-position-spec))
+ (:documentation "Errors of this type are signalled if an erroneous
+position spec is used in conjunction with FILE-POSITION."))
+
+(define-condition external-format-error ()
+ ((external-format :initarg :external-format
+ :initform nil
+ :reader external-format-error-external-format))
+ (:documentation "Superclass for all errors related to external
+formats."))
+
+(define-condition external-format-simple-error (external-format-error simple-condition)
+ ()
+ (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting
+capabilities."))
+
+(define-condition external-format-encoding-error (external-format-simple-error)
+ ()
+ (:documentation "Errors of this type are signalled if there is an
+encoding problem."))
+
+(defun signal-encoding-error (external-format format-control &rest format-args)
+ "Convenience function similar to ERROR to signal conditions of type
+EXTERNAL-FORMAT-ENCODING-ERROR."
+ (error 'external-format-encoding-error
+ :format-control format-control
+ :format-arguments format-args
+ :external-format external-format))
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,125 @@
(in-package :flexi-streams)
+(defgeneric compute-number-of-octets (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+ ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (+ (call-next-method)
+ (* (case (external-format-name format)
+ (:utf-32 4)
+ (otherwise 1))
+ (count #\Newline sequence :start start :end end :test #'char=))))
+
(defgeneric char-to-octets (format char writer)
(declare #.*standard-optimize-settings*)
(:documentation "Converts the character CHAR to a sequence of octets
@@ -37,72 +156,188 @@
repeatedly each octet. The return value of this function is
unspecified."))
-(defmethod char-to-octets ((format flexi-latin-1-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((octet (char-code char)))
+(defgeneric write-sequence* (format stream sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "A generic function which dispatches on the external
+format and does the real work for STREAM-WRITE-SEQUENCE."))
+
+(defgeneric string-to-octets* (format string start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "A generic function which dispatches on the external
+format and does the real work for STRING-TO-OCTETS."))
+
+(defmacro define-sequence-writers ((format-class) &body body)
+ "Utility macro which defines methods for WRITE-SEQUENCE* and
+STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the
+docstring of DEFINE-CHAR-ENCODERS."
+ `(progn
+ (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end))
+ (with-accessors ((column flexi-stream-column))
+ stream
+ (let* ((octet-seen-p nil)
+ (buffer-pos 0)
+ ;; estimate should be good enough...
+ (factor (encoding-factor format))
+ ;; we don't want arbitrarily large buffer, do we?
+ (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+ (buffer (make-octet-buffer buffer-size)))
+ (declare (fixnum buffer-pos buffer-size)
+ (boolean octet-seen-p)
+ (type (array octet *) buffer))
+ (macrolet ((octet-writer (form)
+ `(write-octet ,form)))
+ (labels ((flush-buffer ()
+ "Sends all octets in BUFFER to the underlying stream."
+ (write-sequence buffer stream :end buffer-pos)
+ (setq buffer-pos 0))
+ (write-octet (octet)
+ "Adds one octet to the buffer and flushes it if necessary."
+ (declare (octet octet))
+ (when (>= buffer-pos buffer-size)
+ (flush-buffer))
+ (setf (aref buffer buffer-pos) octet)
+ (incf buffer-pos))
+ (write-object (object)
+ "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+ (etypecase object
+ (octet (setq octet-seen-p t)
+ (write-octet object))
+ (character (symbol-macrolet ((char-getter object))
+ ,@body)))))
+ (macrolet ((iterate (&body output-forms)
+ "An unhygienic macro to implement the actual
+iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+ `(loop for index of-type fixnum from start below end
+ do (progn ,@output-forms)
+ finally (when (plusp buffer-pos)
+ (flush-buffer)))))
+ (etypecase sequence
+ (string (iterate
+ (symbol-macrolet ((char-getter (char sequence index)))
+ ,@body)))
+ (array (iterate
+ (symbol-macrolet ((char-getter (aref sequence index)))
+ ,@body)))
+ (list (iterate (write-object (nth index sequence))))))
+ ;; update the column slot, setting it to NIL if we sent
+ ;; octets
+ (setq column
+ (cond (octet-seen-p nil)
+ (t (let ((last-newline-pos (position #\Newline sequence
+ :test #'char=
+ :start start
+ :end end
+ :from-end t)))
+ (cond (last-newline-pos (- end last-newline-pos 1))
+ (column (+ column (- end start)))))))))))))
+ (defmethod string-to-octets* ((format ,format-class) string start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end) (string string))
+ (let ((octets (make-array (compute-number-of-octets format string start end)
+ :element-type 'octet))
+ (j 0))
+ (declare (fixnum j))
+ (loop for i of-type fixnum from start below end do
+ (macrolet ((octet-writer (form)
+ `(progn
+ (setf (aref (the (array octet *) octets) j) ,form)
+ (incf j))))
+ (symbol-macrolet ((char-getter (char string i)))
+ (progn ,@body))))
+ octets))))
+
+;; char-getter can be called more than once - no side effects
+(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
+ "Utility macro which defines several encoding-related methods for
+the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
+it is assumed that CR-FORMAT-CLASS is the same encoding as
+FORMAT-CLASS but with CR line endings and similar for
+CRLF-FORMAT-CLASS. BODY is a code template for the code to convert
+one character to octets. BODY must contain a symbol CHAR-GETTER
+representing the form which is used to obtain the character and a
+forms like \(OCTET-WRITE <thing>) to write the octet <thing>. The
+CHAR-GETTER form might be called more than once."
+ (let ((body `((locally
+ (declare #.*fixnum-optimize-settings*)
+ ,@body))))
+ `(progn
+ (defmethod char-to-octets ((format ,format-class) char writer)
+ (declare (character char) (function writer))
+ (symbol-macrolet ((char-getter char))
+ (macrolet ((octet-writer (form)
+ `(funcall writer ,form)))
+ ,@body)))
+ (define-sequence-writers (,format-class) ,@body)
+ (define-sequence-writers (,cr-format-class)
+ ,@(sublis `((char-getter . ,(with-unique-names (char)
+ `(let ((,char char-getter))
+ (declare (character ,char))
+ (if (char= ,char #\Newline)
+ #\Return
+ ,char)))))
+ body))
+ (define-sequence-writers (,crlf-format-class)
+ ,(with-unique-names (char write-char)
+ `(flet ((,write-char (,char)
+ ,@(sublis `((char-getter . ,char)) body)))
+ (let ((,char char-getter))
+ (declare (character ,char))
+ (cond ((char= ,char #\Newline)
+ (,write-char #\Return)
+ (,write-char #\Newline))
+ (t (,write-char ,char))))))))))
+
+(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
+ (let ((octet (char-code char-getter)))
(when (> octet 255)
- (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet))
- (funcall writer octet)))
+ (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
+ (octet-writer octet)))
-(defmethod char-to-octets ((format flexi-ascii-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((octet (char-code char)))
+(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+ (let ((octet (char-code char-getter)))
(when (> octet 127)
- (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet))
- (funcall writer octet)))
+ (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
+ (octet-writer octet)))
-(defmethod char-to-octets ((format flexi-8-bit-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
+(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
(with-accessors ((encoding-hash external-format-encoding-hash))
format
- (let ((octet (gethash (char-code char) encoding-hash)))
+ (let ((octet (gethash (char-code char-getter) encoding-hash)))
(unless octet
- (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet))
- (funcall writer octet))))
+ (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
+ (octet-writer octet))))
-(defmethod char-to-octets ((format flexi-utf-8-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((char-code (char-code char)))
+(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
+ (let ((char-code (char-code char-getter)))
(tagbody
(cond ((< char-code #x80)
- (funcall writer char-code)
+ (octet-writer char-code)
(go zero))
((< char-code #x800)
- (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code)))
+ (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code)))
(go one))
((< char-code #x10000)
- (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code)))
+ (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code)))
(go two))
- ((< char-code #x200000)
- (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code)))
- (go three))
- ((< char-code #x4000000)
- (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code)))
- (go four))
- (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100))))
- (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code)))
- four
- (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code)))
- three
- (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code)))
+ (t
+ (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code)))))
+ (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code)))
two
- (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code)))
+ (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code)))
one
- (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code)))
+ (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code)))
zero)))
-(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
(flet ((write-word (word)
- (funcall writer (ldb (byte 8 0) word))
- (funcall writer (ldb (byte 8 8) word))))
+ (octet-writer (ldb (byte 8 0) word))
+ (octet-writer (ldb (byte 8 8) word))))
(declare (inline write-word))
- (let ((char-code (char-code char)))
+ (let ((char-code (char-code char-getter)))
(declare (type char-code-integer char-code))
(cond ((< char-code #x10000)
(write-word char-code))
@@ -110,14 +345,12 @@
(write-word (logior #xd800 (ldb (byte 10 10) char-code)))
(write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
-(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
(flet ((write-word (word)
- (funcall writer (ldb (byte 8 8) word))
- (funcall writer (ldb (byte 8 0) word))))
+ (octet-writer (ldb (byte 8 8) word))
+ (octet-writer (ldb (byte 8 0) word))))
(declare (inline write-word))
- (let ((char-code (char-code char)))
+ (let ((char-code (char-code char-getter)))
(declare (type char-code-integer char-code))
(cond ((< char-code #x10000)
(write-word char-code))
@@ -125,23 +358,19 @@
(write-word (logior #xd800 (ldb (byte 10 10) char-code)))
(write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
-(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((char-code (char-code char)))
- (funcall writer (ldb (byte 8 0) char-code))
- (funcall writer (ldb (byte 8 8) char-code))
- (funcall writer (ldb (byte 8 16) char-code))
- (funcall writer (ldb (byte 8 24) char-code))))
-
-(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((char-code (char-code char)))
- (funcall writer (ldb (byte 8 24) char-code))
- (funcall writer (ldb (byte 8 16) char-code))
- (funcall writer (ldb (byte 8 8) char-code))
- (funcall writer (ldb (byte 8 0) char-code))))
+(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
+ (let ((char-code (char-code char-getter)))
+ (octet-writer (ldb (byte 8 0) char-code))
+ (octet-writer (ldb (byte 8 8) char-code))
+ (octet-writer (ldb (byte 8 16) char-code))
+ (octet-writer (ldb (byte 8 24) char-code))))
+
+(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
+ (let ((char-code (char-code char-getter)))
+ (octet-writer (ldb (byte 8 24) char-code))
+ (octet-writer (ldb (byte 8 16) char-code))
+ (octet-writer (ldb (byte 8 8) char-code))
+ (octet-writer (ldb (byte 8 0) char-code))))
(defmethod char-to-octets ((format flexi-cr-mixin) char writer)
(declare #.*fixnum-optimize-settings*)
Modified: branches/edi/output.lisp
==============================================================================
--- branches/edi/output.lisp (original)
+++ branches/edi/output.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.65 2008/05/24 23:15:25 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -129,7 +129,7 @@
;; needed for AllegroCL - grrr...
(stream-write-char stream #\Newline))
-(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key)
+(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key)
"An optimized version which uses a buffer underneath. The function
can accepts characters as well as octets and it decides what to do
based on the element type of the sequence \(if possible) or on the
@@ -141,7 +141,7 @@
(with-accessors ((column flexi-stream-column)
(external-format flexi-stream-external-format)
(stream flexi-stream-stream))
- stream
+ flexi-output-stream
(when (>= start end)
(return-from stream-write-sequence sequence))
(when (and (vectorp sequence)
@@ -151,59 +151,8 @@
(setq column nil)
(return-from stream-write-sequence
(write-sequence sequence stream :start start :end end)))
- (let* ((octet-seen-p nil)
- (buffer-pos 0)
- (factor (encoding-factor external-format))
- (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
- (buffer (make-octet-buffer buffer-size)))
- (declare (fixnum buffer-pos buffer-size)
- (boolean octet-seen-p)
- (type (array octet *) buffer))
- (labels ((flush-buffer ()
- "Sends all octets in BUFFER to the underlying stream."
- (write-sequence buffer stream :end buffer-pos)
- (setq buffer-pos 0))
- (write-octet (octet)
- "Adds one octet to the buffer and flush it if necessary."
- (declare (octet octet))
- (when (>= buffer-pos buffer-size)
- (flush-buffer))
- (setf (aref buffer buffer-pos) octet)
- (incf buffer-pos))
- (write-character (char)
- "Adds the octets representing the character CHAR to the buffer."
- (char-to-octets external-format char #'write-octet))
- (write-object (object)
- "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
- (etypecase object
- (octet (setq octet-seen-p t)
- (write-octet object))
- (character (write-character object)))))
- (declare (dynamic-extent (function write-octet)))
- (macrolet ((iterate (output-form)
- "An unhygienic macro to implement the actual
-iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
- `(loop for index of-type fixnum from start below end
- do ,output-form
- finally (when (plusp buffer-pos)
- (flush-buffer)))))
- (etypecase sequence
- (string (iterate (write-character (char sequence index))))
- (array (iterate (write-object (aref sequence index))))
- (list (iterate (write-object (nth index sequence)))))
- ;; update the column slot, setting it to NIL if we sent
- ;; octets
- (setq column
- (cond (octet-seen-p nil)
- (t (let ((last-newline-pos (position #\Newline sequence
- :test #'char=
- :start start
- :end end
- :from-end t)))
- (cond (last-newline-pos (- end last-newline-pos 1))
- (column (+ column (- end start))))))))))))
+ ;; otherwise hand over to the external format to do the work
+ (write-sequence* external-format flexi-output-stream sequence start end))
sequence)
(defmethod stream-write-string ((stream flexi-output-stream) string
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -36,56 +36,10 @@
octets corresponding to the external format designated by
EXTERNAL-FORMAT."
(declare #.*standard-optimize-settings*)
- (declare (fixnum start end) (string string))
+ (declare (string string))
(setq external-format (maybe-convert-external-format external-format))
- (let ((factor (encoding-factor external-format))
- (length (- end start)))
- (declare (fixnum length))
- (etypecase factor
- (integer
- (let ((octets (make-array (* factor length) :element-type 'octet))
- (j 0))
- (declare (fixnum j))
- (flet ((writer (octet)
- (declare (octet octet))
- (setf (aref (the (array octet *) octets) j) octet)
- (incf j)))
- (declare (dynamic-extent (function writer)))
- (loop for i of-type fixnum from start below end do
- (char-to-octets external-format
- (char string i)
- #'writer)))
- octets))
- (double-float
- ;; this is a bit clunky but hopefully a bit more efficient than
- ;; using VECTOR-PUSH-EXTEND
- (let* ((octets-length (ceiling (* factor length)))
- (octets (make-array octets-length
- :element-type 'octet
- :fill-pointer t
- :adjustable t))
- (i start)
- (j 0))
- (declare (fixnum i j octets-length)
- (double-float factor))
- (flet ((writer (octet)
- (declare (octet octet))
- (when (>= j octets-length)
- (setq factor (* factor 2.0d0))
- (incf octets-length (the fixnum (ceiling (* factor (- end i)))))
- (adjust-array octets octets-length :fill-pointer t))
- (setf (aref (the (array octet *) octets) j) octet)
- (incf j)))
- (declare (dynamic-extent (function writer)))
- (loop
- (when (>= i end)
- (return))
- (char-to-octets external-format
- (char string i)
- #'writer)
- (incf i))
- (setf (fill-pointer octets) j)
- octets))))))
+ ;; the external format knows how to do it...
+ (string-to-octets* external-format string start end))
(defun octets-to-string (sequence &key
(external-format :latin1)
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp (original)
+++ branches/edi/test/test.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -329,10 +329,11 @@
(loop for i below (length seq1)
always (eql (elt seq1 i) (elt seq2 i)))))
-(defun read-sequence-test (pathspec external-format)
- "Several tests to confirm that READ-SEQUENCE behaves as expected."
- (with-test ((format nil "READ-SEQUENCE tests with format ~S."
- (flex::normalize-external-format external-format)))
+(defun sequence-test (pathspec external-format)
+ "Several tests to confirm that READ-SEQUENCE and WRITE-SEQUENCE
+behave as expected."
+ (with-test ((format nil "Sequence tests with format ~S and file ~A."
+ (flex::normalize-external-format external-format) pathspec))
(let* ((full-path (merge-pathnames pathspec *this-file*))
(file-string (file-as-string full-path external-format))
(string-length (length file-string))
@@ -397,7 +398,33 @@
(check (sequence-equal array (subseq file-string 25 (- string-length 25))))
(check (sequence-equal (loop repeat 25
collect (read-char in))
- (subseq file-string (- string-length 25)))))))))
+ (subseq file-string (- string-length 25))))))
+ (let ((path-out (ensure-directories-exist (merge-pathnames pathspec *tmp-dir*))))
+ (with-open-file (out path-out
+ :direction :output
+ :if-exists :supersede
+ :element-type 'octet)
+ (let ((out (make-flexi-stream out :external-format external-format)))
+ (write-sequence octets out)))
+ (check (file-equal full-path path-out))
+ (with-open-file (out path-out
+ :direction :output
+ :if-exists :supersede
+ :element-type 'octet)
+ (let ((out (make-flexi-stream out :external-format external-format)))
+ (write-sequence file-string out)))
+ (check (file-equal full-path path-out))
+ (with-open-file (out path-out
+ :direction :output
+ :if-exists :supersede
+ :element-type 'octet)
+ (let ((out (make-flexi-stream out :external-format external-format)))
+ (write-sequence file-string out :end 100)
+ (write-sequence octets out
+ :start (length (string-to-octets file-string
+ :external-format external-format
+ :end 100)))))
+ (check (file-equal full-path path-out))))))
(defmacro using-values ((&rest values) &body body)
"Executes BODY and feeds an element from VALUES to the USE-VALUE
@@ -544,7 +571,7 @@
nconc (create-test-combinations file-name symbols t))))
(incf no-tests (length read-sequence-test-args-list))
(dolist (args read-sequence-test-args-list)
- (apply 'read-sequence-test args)))
+ (apply 'sequence-test args)))
(incf no-tests)
(error-handling-test)
(incf no-tests)