Author: eweitz Date: Sat May 24 19:34:51 2008 New Revision: 53
Added: branches/edi/conditions.lisp Modified: branches/edi/encode.lisp branches/edi/output.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Faster encoding - passes all tests on LW
Added: branches/edi/conditions.lisp ============================================================================== --- (empty file) +++ branches/edi/conditions.lisp Sat May 24 19:34:51 2008 @@ -0,0 +1,108 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(define-condition flexi-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to flexi +streams.")) + +(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) + () + (:documentation "Like FLEXI-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition flexi-stream-element-type-error (flexi-stream-error) + ((element-type :initarg :element-type + :reader flexi-stream-element-type-error-element-type)) + (:report (lambda (condition stream) + (format stream "Element type ~S not allowed." + (flexi-stream-element-type-error-element-type condition)))) + (:documentation "Errors of this type are signalled if the flexi +stream has a wrong element type.")) + +(define-condition flexi-stream-out-of-sync-error (flexi-stream-error) + () + (:report (lambda (condition stream) + (declare (ignore condition)) + (format stream "Stream out of sync from previous +lookahead, couldn't rewind."))) + (:documentation "This can happen if you're trying to write to an IO +stream which had prior to that `looked ahead' while reading and now +can't `rewind' to the octet where you /should/ be.")) + +(define-condition in-memory-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +IN-MEMORY streams.")) + +(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition) + () + (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition in-memory-stream-closed-error (in-memory-stream-error) + () + (:report (lambda (condition stream) + (format stream "~S is closed." + (stream-error-stream condition)))) + (:documentation "An error that is signalled when someone is trying +to read from or write to a closed IN-MEMORY stream.")) + +(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error) + ((position-spec :initarg :position-spec + :reader in-memory-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an erroneous +position spec is used in conjunction with FILE-POSITION.")) + +(define-condition external-format-error () + ((external-format :initarg :external-format + :initform nil + :reader external-format-error-external-format)) + (:documentation "Superclass for all errors related to external +formats.")) + +(define-condition external-format-simple-error (external-format-error simple-condition) + () + (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting +capabilities.")) + +(define-condition external-format-encoding-error (external-format-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(defun signal-encoding-error (external-format format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +EXTERNAL-FORMAT-ENCODING-ERROR." + (error 'external-format-encoding-error + :format-control format-control + :format-arguments format-args + :external-format external-format))
Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,125 @@
(in-package :flexi-streams)
+(defgeneric compute-number-of-octets (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of octets required to +encode the sequence of characters in SEQUENCE from START to END using +the external format FORMAT.")) + +(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (- end start)) + +(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 2) + ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (* 4 (- end start))) + +(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (+ (call-next-method) + (* (case (external-format-name format) + (:utf-32 4) + (otherwise 1)) + (count #\Newline sequence :start start :end end :test #'char=)))) + (defgeneric char-to-octets (format char writer) (declare #.*standard-optimize-settings*) (:documentation "Converts the character CHAR to a sequence of octets @@ -37,72 +156,188 @@ repeatedly each octet. The return value of this function is unspecified."))
-(defmethod char-to-octets ((format flexi-latin-1-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((octet (char-code char))) +(defgeneric write-sequence* (format stream sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "A generic function which dispatches on the external +format and does the real work for STREAM-WRITE-SEQUENCE.")) + +(defgeneric string-to-octets* (format string start end) + (declare #.*standard-optimize-settings*) + (:documentation "A generic function which dispatches on the external +format and does the real work for STRING-TO-OCTETS.")) + +(defmacro define-sequence-writers ((format-class) &body body) + "Utility macro which defines methods for WRITE-SEQUENCE* and +STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the +docstring of DEFINE-CHAR-ENCODERS." + `(progn + (defmethod write-sequence* ((format ,format-class) stream sequence start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((column flexi-stream-column)) + stream + (let* ((octet-seen-p nil) + (buffer-pos 0) + ;; estimate should be good enough... + (factor (encoding-factor format)) + ;; we don't want arbitrarily large buffer, do we? + (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) + (buffer (make-octet-buffer buffer-size))) + (declare (fixnum buffer-pos buffer-size) + (boolean octet-seen-p) + (type (array octet *) buffer)) + (macrolet ((octet-writer (form) + `(write-octet ,form))) + (labels ((flush-buffer () + "Sends all octets in BUFFER to the underlying stream." + (write-sequence buffer stream :end buffer-pos) + (setq buffer-pos 0)) + (write-octet (octet) + "Adds one octet to the buffer and flushes it if necessary." + (declare (octet octet)) + (when (>= buffer-pos buffer-size) + (flush-buffer)) + (setf (aref buffer buffer-pos) octet) + (incf buffer-pos)) + (write-object (object) + "Dispatches to WRITE-OCTET or WRITE-CHARACTER +depending on the type of OBJECT." + (etypecase object + (octet (setq octet-seen-p t) + (write-octet object)) + (character (symbol-macrolet ((char-getter object)) + ,@body))))) + (macrolet ((iterate (&body output-forms) + "An unhygienic macro to implement the actual +iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one +sequence element and put its octet representation into the buffer." + `(loop for index of-type fixnum from start below end + do (progn ,@output-forms) + finally (when (plusp buffer-pos) + (flush-buffer))))) + (etypecase sequence + (string (iterate + (symbol-macrolet ((char-getter (char sequence index))) + ,@body))) + (array (iterate + (symbol-macrolet ((char-getter (aref sequence index))) + ,@body))) + (list (iterate (write-object (nth index sequence)))))) + ;; update the column slot, setting it to NIL if we sent + ;; octets + (setq column + (cond (octet-seen-p nil) + (t (let ((last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (cond (last-newline-pos (- end last-newline-pos 1)) + (column (+ column (- end start))))))))))))) + (defmethod string-to-octets* ((format ,format-class) string start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (let ((octets (make-array (compute-number-of-octets format string start end) + :element-type 'octet)) + (j 0)) + (declare (fixnum j)) + (loop for i of-type fixnum from start below end do + (macrolet ((octet-writer (form) + `(progn + (setf (aref (the (array octet *) octets) j) ,form) + (incf j)))) + (symbol-macrolet ((char-getter (char string i))) + (progn ,@body)))) + octets)))) + +;; char-getter can be called more than once - no side effects +(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body) + "Utility macro which defines several encoding-related methods for +the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where +it is assumed that CR-FORMAT-CLASS is the same encoding as +FORMAT-CLASS but with CR line endings and similar for +CRLF-FORMAT-CLASS. BODY is a code template for the code to convert +one character to octets. BODY must contain a symbol CHAR-GETTER +representing the form which is used to obtain the character and a +forms like (OCTET-WRITE <thing>) to write the octet <thing>. The +CHAR-GETTER form might be called more than once." + (let ((body `((locally + (declare #.*fixnum-optimize-settings*) + ,@body)))) + `(progn + (defmethod char-to-octets ((format ,format-class) char writer) + (declare (character char) (function writer)) + (symbol-macrolet ((char-getter char)) + (macrolet ((octet-writer (form) + `(funcall writer ,form))) + ,@body))) + (define-sequence-writers (,format-class) ,@body) + (define-sequence-writers (,cr-format-class) + ,@(sublis `((char-getter . ,(with-unique-names (char) + `(let ((,char char-getter)) + (declare (character ,char)) + (if (char= ,char #\Newline) + #\Return + ,char))))) + body)) + (define-sequence-writers (,crlf-format-class) + ,(with-unique-names (char write-char) + `(flet ((,write-char (,char) + ,@(sublis `((char-getter . ,char)) body))) + (let ((,char char-getter)) + (declare (character ,char)) + (cond ((char= ,char #\Newline) + (,write-char #\Return) + (,write-char #\Newline)) + (t (,write-char ,char)))))))))) + +(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) + (let ((octet (char-code char-getter))) (when (> octet 255) - (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet)) - (funcall writer octet))) + (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet)) + (octet-writer octet)))
-(defmethod char-to-octets ((format flexi-ascii-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((octet (char-code char))) +(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format) + (let ((octet (char-code char-getter))) (when (> octet 127) - (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet)) - (funcall writer octet))) + (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet)) + (octet-writer octet)))
-(defmethod char-to-octets ((format flexi-8-bit-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) +(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format) (with-accessors ((encoding-hash external-format-encoding-hash)) format - (let ((octet (gethash (char-code char) encoding-hash))) + (let ((octet (gethash (char-code char-getter) encoding-hash))) (unless octet - (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet)) - (funcall writer octet)))) + (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet)) + (octet-writer octet))))
-(defmethod char-to-octets ((format flexi-utf-8-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((char-code (char-code char))) +(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format) + (let ((char-code (char-code char-getter))) (tagbody (cond ((< char-code #x80) - (funcall writer char-code) + (octet-writer char-code) (go zero)) ((< char-code #x800) - (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code))) (go one)) ((< char-code #x10000) - (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code))) (go two)) - ((< char-code #x200000) - (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code))) - (go three)) - ((< char-code #x4000000) - (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) - (go four)) - (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100)))) - (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) - four - (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) - three - (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code))) + (t + (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code))))) + (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code))) two - (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) + (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code))) one - (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) + (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code))) zero)))
-(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) +(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format) (flet ((write-word (word) - (funcall writer (ldb (byte 8 0) word)) - (funcall writer (ldb (byte 8 8) word)))) + (octet-writer (ldb (byte 8 0) word)) + (octet-writer (ldb (byte 8 8) word)))) (declare (inline write-word)) - (let ((char-code (char-code char))) + (let ((char-code (char-code char-getter))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) @@ -110,14 +345,12 @@ (write-word (logior #xd800 (ldb (byte 10 10) char-code))) (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
-(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) +(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format) (flet ((write-word (word) - (funcall writer (ldb (byte 8 8) word)) - (funcall writer (ldb (byte 8 0) word)))) + (octet-writer (ldb (byte 8 8) word)) + (octet-writer (ldb (byte 8 0) word)))) (declare (inline write-word)) - (let ((char-code (char-code char))) + (let ((char-code (char-code char-getter))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) @@ -125,23 +358,19 @@ (write-word (logior #xd800 (ldb (byte 10 10) char-code))) (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
-(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((char-code (char-code char))) - (funcall writer (ldb (byte 8 0) char-code)) - (funcall writer (ldb (byte 8 8) char-code)) - (funcall writer (ldb (byte 8 16) char-code)) - (funcall writer (ldb (byte 8 24) char-code)))) - -(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((char-code (char-code char))) - (funcall writer (ldb (byte 8 24) char-code)) - (funcall writer (ldb (byte 8 16) char-code)) - (funcall writer (ldb (byte 8 8) char-code)) - (funcall writer (ldb (byte 8 0) char-code)))) +(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format) + (let ((char-code (char-code char-getter))) + (octet-writer (ldb (byte 8 0) char-code)) + (octet-writer (ldb (byte 8 8) char-code)) + (octet-writer (ldb (byte 8 16) char-code)) + (octet-writer (ldb (byte 8 24) char-code)))) + +(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) + (let ((char-code (char-code char-getter))) + (octet-writer (ldb (byte 8 24) char-code)) + (octet-writer (ldb (byte 8 16) char-code)) + (octet-writer (ldb (byte 8 8) char-code)) + (octet-writer (ldb (byte 8 0) char-code))))
(defmethod char-to-octets ((format flexi-cr-mixin) char writer) (declare #.*fixnum-optimize-settings*)
Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.65 2008/05/24 23:15:25 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -129,7 +129,7 @@ ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline))
-(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key) +(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) "An optimized version which uses a buffer underneath. The function can accepts characters as well as octets and it decides what to do based on the element type of the sequence (if possible) or on the @@ -141,7 +141,7 @@ (with-accessors ((column flexi-stream-column) (external-format flexi-stream-external-format) (stream flexi-stream-stream)) - stream + flexi-output-stream (when (>= start end) (return-from stream-write-sequence sequence)) (when (and (vectorp sequence) @@ -151,59 +151,8 @@ (setq column nil) (return-from stream-write-sequence (write-sequence sequence stream :start start :end end))) - (let* ((octet-seen-p nil) - (buffer-pos 0) - (factor (encoding-factor external-format)) - (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) - (buffer (make-octet-buffer buffer-size))) - (declare (fixnum buffer-pos buffer-size) - (boolean octet-seen-p) - (type (array octet *) buffer)) - (labels ((flush-buffer () - "Sends all octets in BUFFER to the underlying stream." - (write-sequence buffer stream :end buffer-pos) - (setq buffer-pos 0)) - (write-octet (octet) - "Adds one octet to the buffer and flush it if necessary." - (declare (octet octet)) - (when (>= buffer-pos buffer-size) - (flush-buffer)) - (setf (aref buffer buffer-pos) octet) - (incf buffer-pos)) - (write-character (char) - "Adds the octets representing the character CHAR to the buffer." - (char-to-octets external-format char #'write-octet)) - (write-object (object) - "Dispatches to WRITE-OCTET or WRITE-CHARACTER -depending on the type of OBJECT." - (etypecase object - (octet (setq octet-seen-p t) - (write-octet object)) - (character (write-character object))))) - (declare (dynamic-extent (function write-octet))) - (macrolet ((iterate (output-form) - "An unhygienic macro to implement the actual -iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one -sequence element and put its octet representation into the buffer." - `(loop for index of-type fixnum from start below end - do ,output-form - finally (when (plusp buffer-pos) - (flush-buffer))))) - (etypecase sequence - (string (iterate (write-character (char sequence index)))) - (array (iterate (write-object (aref sequence index)))) - (list (iterate (write-object (nth index sequence))))) - ;; update the column slot, setting it to NIL if we sent - ;; octets - (setq column - (cond (octet-seen-p nil) - (t (let ((last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t))) - (cond (last-newline-pos (- end last-newline-pos 1)) - (column (+ column (- end start)))))))))))) + ;; otherwise hand over to the external format to do the work + (write-sequence* external-format flexi-output-stream sequence start end)) sequence)
(defmethod stream-write-string ((stream flexi-output-stream) string
Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -36,56 +36,10 @@ octets corresponding to the external format designated by EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) - (declare (fixnum start end) (string string)) + (declare (string string)) (setq external-format (maybe-convert-external-format external-format)) - (let ((factor (encoding-factor external-format)) - (length (- end start))) - (declare (fixnum length)) - (etypecase factor - (integer - (let ((octets (make-array (* factor length) :element-type 'octet)) - (j 0)) - (declare (fixnum j)) - (flet ((writer (octet) - (declare (octet octet)) - (setf (aref (the (array octet *) octets) j) octet) - (incf j))) - (declare (dynamic-extent (function writer))) - (loop for i of-type fixnum from start below end do - (char-to-octets external-format - (char string i) - #'writer))) - octets)) - (double-float - ;; this is a bit clunky but hopefully a bit more efficient than - ;; using VECTOR-PUSH-EXTEND - (let* ((octets-length (ceiling (* factor length))) - (octets (make-array octets-length - :element-type 'octet - :fill-pointer t - :adjustable t)) - (i start) - (j 0)) - (declare (fixnum i j octets-length) - (double-float factor)) - (flet ((writer (octet) - (declare (octet octet)) - (when (>= j octets-length) - (setq factor (* factor 2.0d0)) - (incf octets-length (the fixnum (ceiling (* factor (- end i))))) - (adjust-array octets octets-length :fill-pointer t)) - (setf (aref (the (array octet *) octets) j) octet) - (incf j))) - (declare (dynamic-extent (function writer))) - (loop - (when (>= i end) - (return)) - (char-to-octets external-format - (char string i) - #'writer) - (incf i)) - (setf (fill-pointer octets) j) - octets)))))) + ;; the external format knows how to do it... + (string-to-octets* external-format string start end))
(defun octets-to-string (sequence &key (external-format :latin1)
Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -329,10 +329,11 @@ (loop for i below (length seq1) always (eql (elt seq1 i) (elt seq2 i)))))
-(defun read-sequence-test (pathspec external-format) - "Several tests to confirm that READ-SEQUENCE behaves as expected." - (with-test ((format nil "READ-SEQUENCE tests with format ~S." - (flex::normalize-external-format external-format))) +(defun sequence-test (pathspec external-format) + "Several tests to confirm that READ-SEQUENCE and WRITE-SEQUENCE +behave as expected." + (with-test ((format nil "Sequence tests with format ~S and file ~A." + (flex::normalize-external-format external-format) pathspec)) (let* ((full-path (merge-pathnames pathspec *this-file*)) (file-string (file-as-string full-path external-format)) (string-length (length file-string)) @@ -397,7 +398,33 @@ (check (sequence-equal array (subseq file-string 25 (- string-length 25)))) (check (sequence-equal (loop repeat 25 collect (read-char in)) - (subseq file-string (- string-length 25))))))))) + (subseq file-string (- string-length 25)))))) + (let ((path-out (ensure-directories-exist (merge-pathnames pathspec *tmp-dir*)))) + (with-open-file (out path-out + :direction :output + :if-exists :supersede + :element-type 'octet) + (let ((out (make-flexi-stream out :external-format external-format))) + (write-sequence octets out))) + (check (file-equal full-path path-out)) + (with-open-file (out path-out + :direction :output + :if-exists :supersede + :element-type 'octet) + (let ((out (make-flexi-stream out :external-format external-format))) + (write-sequence file-string out))) + (check (file-equal full-path path-out)) + (with-open-file (out path-out + :direction :output + :if-exists :supersede + :element-type 'octet) + (let ((out (make-flexi-stream out :external-format external-format))) + (write-sequence file-string out :end 100) + (write-sequence octets out + :start (length (string-to-octets file-string + :external-format external-format + :end 100))))) + (check (file-equal full-path path-out))))))
(defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE @@ -544,7 +571,7 @@ nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length read-sequence-test-args-list)) (dolist (args read-sequence-test-args-list) - (apply 'read-sequence-test args))) + (apply 'sequence-test args))) (incf no-tests) (error-handling-test) (incf no-tests)