Author: eweitz Date: Sat May 17 18:31:08 2008 New Revision: 23
Added: branches/edi/conditions.lisp (contents, props changed) branches/edi/decode.lisp (contents, props changed) branches/edi/encode.lisp (contents, props changed) Modified: branches/edi/ascii.lisp branches/edi/code-pages.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd branches/edi/in-memory.lisp branches/edi/input.lisp branches/edi/iso-8859.lisp branches/edi/lw-binary-stream.lisp branches/edi/output.lisp branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/stream.lisp branches/edi/strings.lisp branches/edi/test/packages.lisp branches/edi/test/test.lisp branches/edi/util.lisp Log: Start of reorg - this time as a diff from trunk
Modified: branches/edi/ascii.lisp ============================================================================== --- branches/edi/ascii.lisp (original) +++ branches/edi/ascii.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.7 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.8 2008/05/17 13:50:15 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions
Modified: branches/edi/code-pages.lisp ============================================================================== --- branches/edi/code-pages.lisp (original) +++ branches/edi/code-pages.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.6 2008/05/17 13:50:15 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions
Added: branches/edi/conditions.lisp ============================================================================== --- (empty file) +++ branches/edi/conditions.lisp Sat May 17 18:31:08 2008 @@ -0,0 +1,84 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.3 2008/05/17 15:56:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(define-condition flexi-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +flexi streams.")) + +(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) + () + (:documentation "Like FLEXI-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition flexi-stream-element-type-error (flexi-stream-error) + ((element-type :initarg :element-type + :reader flexi-stream-element-type-error-element-type)) + (:report (lambda (condition stream) + (format stream "Element type ~S not allowed." + (flexi-stream-element-type-error-element-type condition)))) + (:documentation "Errors of this type are signalled if the flexi +stream has a wrong element type.")) + +(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) + ((position-spec :initarg :position-spec + :reader flexi-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an +erroneous position spec is used in conjunction with +FILE-POSITION.")) + +;; TODO: stream might not be a stream... +(defun signal-encoding-error (flexi-stream format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +FLEXI-STREAM-ENCODING-ERROR." + (error 'flexi-stream-encoding-error + :format-control format-control + :format-arguments format-args + :stream flexi-stream)) + +(define-condition in-memory-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +IN-MEMORY streams.")) + +(define-condition in-memory-stream-closed-error (in-memory-stream-error) + () + (:report (lambda (condition stream) + (format stream "~S is closed." + (stream-error-stream condition)))) + (:documentation "An error that is signalled when someone is trying +to read from or write to a closed IN-MEMORY stream.")) +
Added: branches/edi/decode.lisp ============================================================================== --- (empty file) +++ branches/edi/decode.lisp Sat May 17 18:31:08 2008 @@ -0,0 +1,151 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric char-to-octets (format char writer stream) + (:documentation "Converts the character CHAR to sequence of octets +and sends this sequence to SINK. STREAM will always be a flexi stream +which is used to determine how the character should be converted. +This function does all the work for STREAM-WRITE-CHAR in which case +SINK is the same as STREAM. It is also used in the implementation of +STREAM-WRITE-SEQUENCE below.")) + +(defmethod char-to-octets ((format flexi-latin-1-format) char writer stream) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 255) + (signal-encoding-error stream "~S is not a LATIN-1 character." char)) + (funcall writer octet)) + char) + +(defmethod char-to-octets ((format flexi-ascii-format) char writer stream) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 127) + (signal-encoding-error stream "~S is not an ASCII character." char)) + (funcall writer octet)) + char) + +(defmethod char-to-octets ((format flexi-8-bit-format) char writer stream) + (declare (optimize speed)) + (with-accessors ((encoding-hash external-format-encoding-hash)) + format + (let ((octet (gethash (char-code char) encoding-hash))) + (unless octet + (signal-encoding-error stream "~S is not in this encoding." char)) + (funcall writer octet)) + char)) + +(defmethod char-to-octets ((format flexi-utf-8-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (let ((char-code (char-code char))) + (tagbody + (cond ((< char-code #x80) + (funcall writer char-code) + (go zero)) + ((< char-code #x800) + (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (go one)) + ((< char-code #x10000) + (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (go two)) + ((< char-code #x200000) + (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code))) + (go three)) + ((< char-code #x4000000) + (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) + (go four)) + (t (funcall writer (logior #b11111100 (ldb (byte 1 30) char-code))))) + (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) + four + (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) + three + (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code))) + two + (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) + one + (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) + zero)) + char) + +(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 0) word)) + (funcall writer (ldb (byte 8 8) word)))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 8) word)) + (funcall writer (ldb (byte 8 0) word)))) + (declare (inline write-word) (dynamic-extent (function write-word))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (loop with char-code = (char-code char) + for position in '(0 8 16 24) do + (funcall writer (ldb (byte 8 position) char-code))) + char) + +(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (loop with char-code = (char-code char) + for position in '(24 16 8 0) do + (funcall writer (ldb (byte 8 position) char-code))) + char) + +(defmethod char-to-octets ((format flexi-cr-mixin) char writer stream) + "The `base' method for all formats which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of sending +one or more characters to SINK." + (declare (optimize speed)) + (case char + (#\Newline + (case (external-format-eol-style format) + (:cr (call-next-method format #\Return writer stream)) + (:crlf (call-next-method format #\Return writer stream) + (call-next-method format #\Linefeed writer stream)))) + (otherwise (call-next-method))) + char)
Added: branches/edi/encode.lisp ============================================================================== --- (empty file) +++ branches/edi/encode.lisp Sat May 17 18:31:08 2008 @@ -0,0 +1,237 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defun recover-from-encoding-error (stream format-control &rest format-args) + "Helper function used by the STREAM-READ-CHAR methods below to deal +with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and +returns its character code in this case. Otherwise signals a +FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this +function and provides a corresponding USE-VALUE restart." + (when *substitution-char* + (return-from recover-from-encoding-error (char-code *substitution-char*))) + (restart-case + (apply #'signal-encoding-error stream format-control format-args) + (use-value (char) + :report "Specify a character to be used instead." + :interactive (lambda () + (loop + (format *query-io* "Type a character: ") + (let ((line (read-line *query-io*))) + (when (= 1 (length line)) + (return (list (char line 0))))))) + (char-code char)))) + +(defmethod octets-to-char-code ((format flexi-latin-1-format) reader unreader stream) + (declare (ignore unreader stream)) + (or (funcall reader) :eof)) + +(defmethod octets-to-char-code ((format flexi-ascii-format) reader unreader stream) + (declare (ignore unreader)) + (let ((octet (or (funcall reader) + (return-from octets-to-char-code :eof)))) + (declare (type octet octet)) + (if (> octet 127) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + octet))) + +(defmethod octets-to-char-code ((format flexi-8-bit-format) reader unreader stream) + (declare (ignore unreader)) + (with-accessors ((decoding-table external-format-decoding-table)) + format + (let* ((octet (or (funcall reader) + (return-from octets-to-char-code :eof))) + (char-code (aref (the (simple-array * *) decoding-table) octet))) + (declare (type octet octet)) + (if (or (null char-code) + (= char-code 65533)) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + char-code)))) + +(defmethod octets-to-char-code ((format flexi-utf-8-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-8 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (let ((octet (read-next-byte))) + (declare (type octet octet)) + (multiple-value-bind (start count) + (cond ((zerop (logand octet #b10000000)) + (values octet 0)) + ((= #b11000000 (logand octet #b11100000)) + (values (logand octet #b00011111) 1)) + ((= #b11100000 (logand octet #b11110000)) + (values (logand octet #b00001111) 2)) + ((= #b11110000 (logand octet #b11111000)) + (values (logand octet #b00000111) 3)) + ((= #b11111000 (logand octet #b11111100)) + (values (logand octet #b00000011) 4)) + ((= #b11111100 (logand octet #b11111110)) + (values (logand octet #b00000001) 5)) + (t (return-from octets-to-char-code + (recover-from-encoding-error stream + "Unexpected value #x~X at start of UTF-8 sequence." + octet)))) + ;; note that we currently don't check for "overlong" + ;; sequences or other illegal values + (loop for result of-type (unsigned-byte 32) + = start then (+ (ash result 6) + (logand octet #b111111)) + repeat count + for octet of-type octet = (read-next-byte) + unless (= #b10000000 (logand octet #b11000000)) + do (return-from octets-to-char-code + (recover-from-encoding-error stream + "Unexpected value #x~X in UTF-8 sequence." octet)) + finally (return result))))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (the octet (read-next-byte)) + (ash (the octet (read-next-byte)) 8)))) + (declare (inline read-next-byte read-next-word) + (dynamic-extent (function read-next-byte) (function read-next-word))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word)))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (ash (the octet (read-next-byte)) 8) + (the octet (read-next-byte))))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word)))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader unreader stream) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 0 to 24 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 24 downto 0 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-cr-mixin) reader unreader stream) + "The `base' method for all streams which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of reading +one or more encoded characters." + (declare (optimize speed)) + (let ((char-code (call-next-method))) + (when (eq char-code :eof) + (return-from octets-to-char-code :eof)) + (with-accessors ((eol-style external-format-eol-style)) + format + (cond ((= char-code #.(char-code #\Return)) + (case eol-style + (:cr #.(char-code #\Newline)) + ;; in the case :CRLF we have to look ahead one character + (:crlf (let ((next-char-code (call-next-method))) + (case next-char-code + (#.(char-code #\Linefeed) + #.(char-code #\Newline)) + (:eof char-code) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise + (funcall unreader (code-char next-char-code)) + char-code)))))) + (t char-code))))) +
Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.11 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.15 2008/05/17 16:38:24 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -58,6 +58,154 @@ "Defines a way to reconstruct external formats. Needed for OpenMCL." (make-load-form-saving-slots thing :environment environment))
+(defclass flexi-cr-mixin () + () + (:documentation "A mixin for external-formats which need +end-of-line conversion, i.e. for those where the end-of-line +designator is /not/ the single character #\Linefeed.")) + +(defclass flexi-8-bit-format (external-format) + ((encoding-hash :accessor external-format-encoding-hash) + (decoding-table :accessor external-format-decoding-table)) + (:documentation "The class for all flexi streams which use an 8-bit +encoding and thus need additional slots for the encoding/decoding +tables.")) + +(defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) + () + (:documentation "The class for all external formats which use an +8-bit encoding /and/ need end-of-line conversion.")) + +(defclass flexi-ascii-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +US-ASCCI encoding.")) + +(defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) + () + (:documentation "Special class for external formats which use the +US-ASCCI encoding /and/ need end-of-line conversion.")) + +(defclass flexi-latin-1-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-utf-32-le-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-32-be-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering /and/ need end-of-line +conversion.")) + +(defclass flexi-utf-16-le-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-be-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ need end-of-line +conversion.")) + +(defclass flexi-utf-8-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ need end-of-line conversion.")) + +(defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) + "Sets the fixed encoding/decoding tables for this particular +external format." + (declare (ignore initargs)) + (with-accessors ((encoding-hash external-format-encoding-hash) + (decoding-table flexi-stream-decoding-table) + (name external-format-name) + (id external-format-id)) + external-format + (multiple-value-setq (encoding-hash decoding-table) + (cond ((ascii-name-p name) + (values +ascii-hash+ +ascii-table+)) + ((koi8-r-name-p name) + (values +koi8-r-hash+ +koi8-r-table+)) + ((iso-8859-name-p name) + (values (cdr (assoc name +iso-8859-hashes+ :test #'eq)) + (cdr (assoc name +iso-8859-tables+ :test #'eq)))) + ((code-page-name-p name) + (values (cdr (assoc id +code-page-hashes+)) + (cdr (assoc id +code-page-tables+)))))))) + +(defun external-format-class-name (real-name eol-style little-endian) + (let ((crp (not (eq eol-style :lf)))) + (cond ((ascii-name-p real-name) + (if crp + 'flexi-cr-ascii-format + 'flexi-ascii-format)) + ((eq real-name :iso-8859-1) + (if crp + 'flexi-cr-latin-1-format + 'flexi-latin-1-format)) + ((or (koi8-r-name-p real-name) + (iso-8859-name-p real-name) + (code-page-name-p real-name)) + (if crp + 'flexi-cr-8-bit-format + 'flexi-8-bit-format)) + (t (case real-name + (:utf-8 (if crp + 'flexi-cr-utf-8-format + 'flexi-utf-8-format)) + (:utf-16 (if crp + (if little-endian + 'flexi-cr-utf-16-le-format + 'flexi-cr-utf-16-be-format) + (if little-endian + 'flexi-utf-16-le-format + 'flexi-utf-16-be-format))) + (:utf-32 (if crp + (if little-endian + 'flexi-cr-utf-32-le-format + 'flexi-cr-utf-32-be-format) + (if little-endian + 'flexi-utf-32-le-format + 'flexi-utf-32-be-format)))))))) + (defun make-external-format% (name &key (little-endian *default-little-endian*) id eol-style) "Used internally by MAKE-EXTERNAL-FORMAT." @@ -74,7 +222,7 @@ :eol-style (or eol-style :crlf))) (t (list :eol-style (or eol-style *default-eol-style*) :little-endian little-endian))))) - (apply #'make-instance 'external-format + (apply #'make-instance (external-format-class-name real-name eol-style little-endian) :name real-name initargs)))
Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sat May 17 18:31:08 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.58 2007/12/29 23:15:26 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.60 2008/05/17 15:56:16 edi Exp $
;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved.
@@ -45,7 +45,10 @@ (:file "specials") (:file "util") (:file "external-format") + (:file "encode") + (:file "decode") (:file "in-memory") + (:file "conditions") (:file "stream") #+:lispworks (:file "lw-binary-stream") (:file "output")
Modified: branches/edi/in-memory.lisp ============================================================================== --- branches/edi/in-memory.lisp (original) +++ branches/edi/in-memory.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.26 2007/12/29 21:17:05 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.29 2008/05/17 16:35:58 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -104,19 +104,6 @@ (:documentation "A binary output stream that writes its data to an associated vector."))
-(define-condition in-memory-stream-error (stream-error) - () - (:documentation "Superclass for all errors related to -IN-MEMORY streams.")) - -(define-condition in-memory-stream-closed-error (in-memory-stream-error) - () - (:report (lambda (condition stream) - (format stream "~S is closed." - (stream-error-stream condition)))) - (:documentation "An error that is signalled when someone is trying -to read from or write to a closed IN-MEMORY stream.")) - #+:cmu (defmethod open-stream-p ((stream in-memory-stream)) "Returns a true value if STREAM is open. See ANSI standard." @@ -382,14 +369,3 @@ ,@body (get-output-stream-sequence ,var :as-list ,as-list)) (when ,var (close ,var))))) - -(declaim (inline translate-char)) -(defun translate-char (char-code external-format) - "Returns a list of octets which correspond to the -representation of the character with character code CHAR-CODE -when sent to a flexi stream with external format EXTERNAL-FORMAT. -Used internally by UNREAD-CHAR%. See also STRING-TO-OCTETS." - (declare (optimize speed)) - (with-output-to-sequence (list :as-list t) - (let ((stream (make-flexi-stream list :external-format external-format))) - (write-char (code-char char-code) stream)))) \ No newline at end of file
Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.51 2007/12/29 22:58:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.57 2008/05/17 16:44:53 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -140,344 +140,47 @@ (setq last-octet octet) (or octet :eof))))
-(defgeneric unread-char% (char-code flexi-input-stream) - (:documentation "Used internally to put a character denoted by the -character code CHAR-CODE which was already read back on the stream. -Uses the OCTET-STACK slot and decrements the POSITION slot -accordingly.")) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-input-stream)) - "The default method which is un-optimized and uses TRANSLATE-CHAR to -figure out which octets to put on the octet stack." - (declare (optimize speed) (inline translate-char)) +(defun unread-char% (char flexi-input-stream) + "Used internally to put a character CHAR which was already read back +on the stream. Uses the OCTET-STACK slot and decrements the POSITION +slot accordingly." (with-accessors ((position flexi-stream-position) (octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format)) flexi-input-stream - (declare (integer position)) - (let ((octets-read (translate-char char-code external-format))) - (decf position (length octets-read)) - (setq octet-stack (append octets-read octet-stack))))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-latin-1-input-stream)) - "For ISO-8859-1 we can simply put the character code itself on the -octet stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push char-code octet-stack))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-ascii-input-stream)) - "For ASCII we can simply put the character code itself on the octet -stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push char-code octet-stack))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-8-bit-input-stream)) - "For 8-bit encodings we just have to put one octet on the octet -stack which we can look up in the encoding hash." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (encoding-hash flexi-stream-encoding-hash)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push (gethash char-code encoding-hash) octet-stack))) - -(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) - (flexi-input-stream flexi-cr-8-bit-input-stream)) - "A kind of `safety net' for the optimized 8-bit versions of -UNREAD-CHAR% which checks for the single case where more than one -octet has to be put on the octet stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (external-format flexi-stream-external-format)) - flexi-input-stream - (declare (integer position)) - ;; note that below we use the knowledge that in all 8-bit encodings - ;; #\Return and #\Linefeed are mapped to the same octets - (case (external-format-eol-style external-format) - (:crlf - (decf position 2) - (push #.(char-code #\Linefeed) octet-stack) - (push #.(char-code #\Return) octet-stack)) - (otherwise - (decf position) - (push #.(char-code #\Return) octet-stack))))) - -#+:lispworks -(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) - (flexi-input-stream flexi-binary-cr-8-bit-input-stream)) - "A kind of `safety net' for the optimized 8-bit versions of -UNREAD-CHAR% which checks for the single case where more than one -octet has to be put on the octet stack. - -This method (identical to the one defined directly above) exists only -for LispWorks' "binary" streams and must be there due to the -slightly clunky class hierarchy." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (external-format flexi-stream-external-format)) - flexi-input-stream - (declare (integer position)) - ;; note that below we use the knowledge that in all 8-bit encodings - ;; #\Return and #\Linefeed are mapped to the same octets - (case (external-format-eol-style external-format) - (:crlf - (decf position 2) - (push #.(char-code #\Linefeed) octet-stack) - (push #.(char-code #\Return) octet-stack)) - (otherwise - (decf position) - (push #.(char-code #\Return) octet-stack))))) - -(defmacro define-char-reader ((stream-var stream-class) &body body) - "Helper macro to define methods for STREAM-READ-CHAR. Defines a -method for the class STREAM-CLASS using the variable STREAM-VAR and -the code body BODY wrapped with some standard code common to all -methods defined here. The return value of BODY is a character code. -In case of encoding problems, BODY must return the value returned by -(RECOVER-FROM-ENCODING-ERROR ...)." - (with-unique-names (char-code body-fn) - `(defmethod stream-read-char ((,stream-var ,stream-class)) - "This method was generated with the DEFINE-CHAR-READER macro." - (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library - (with-accessors ((last-octet flexi-stream-last-octet) - (last-char-code flexi-stream-last-char-code)) - ,stream-var - ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after - ;; this operation - (setq last-octet nil) - (let ((,char-code - (flet ((,body-fn () ,@body)) - (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) - (,body-fn)))) - ;; remember this character and the current external format - ;; for UNREAD-CHAR - (setq last-char-code ,char-code) - (or (code-char ,char-code) ,char-code)))))) - -(defun recover-from-encoding-error (flexi-stream format-control &rest format-args) - "Helper function used by the STREAM-READ-CHAR methods below to deal -with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and -returns its character code in this case. Otherwise signals a -FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this -function and provides a corresponding USE-VALUE restart." - (when *substitution-char* - (return-from recover-from-encoding-error (char-code *substitution-char*))) - (restart-case - (apply #'signal-encoding-error flexi-stream format-control format-args) - (use-value (char) - :report "Specify a character to be used instead." - :interactive (lambda () - (loop - (format *query-io* "Type a character: ") - (let ((line (read-line *query-io*))) - (when (= 1 (length line)) - (return (list (char line 0))))))) - (char-code char)))) - -(define-char-reader (stream flexi-latin-1-input-stream) - (or (read-byte* stream) - (return-from stream-read-char :eof))) - -(define-char-reader (stream flexi-ascii-input-stream) - (let ((octet (or (read-byte* stream) - (return-from stream-read-char :eof)))) - (declare (type octet octet)) - (if (> octet 127) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) - octet))) - -(define-char-reader (stream flexi-8-bit-input-stream) - (with-accessors ((encoding-table flexi-stream-encoding-table)) + (let ((counter 0) octets-reversed) + (declare (integer position) + (fixnum counter)) + (char-to-octets external-format + char + (lambda (octet) + (incf counter) + (push octet octets-reversed)) + nil) + (decf position counter) + (setq octet-stack (nreconc octets-reversed octet-stack))))) + +(defmethod stream-read-char ((stream flexi-input-stream)) + (declare (optimize speed)) + ;; note that we do nothing for the :LF EOL style because we assume + ;; that #\Newline is the same as #\Linefeed in all Lisps which will + ;; use this library + (with-accessors ((external-format flexi-stream-external-format) + (last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) stream - (let* ((octet (or (read-byte* stream) - (return-from stream-read-char :eof))) - (char-code (aref (the (simple-array * *) encoding-table) octet))) - (declare (type octet octet)) - (if (or (null char-code) - (= char-code 65533)) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) - char-code)))) - -(define-char-reader (stream flexi-utf-8-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-8 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (let ((octet (read-next-byte))) - (declare (type octet octet)) - (multiple-value-bind (start count) - (cond ((zerop (logand octet #b10000000)) - (values octet 0)) - ((= #b11000000 (logand octet #b11100000)) - (values (logand octet #b00011111) 1)) - ((= #b11100000 (logand octet #b11110000)) - (values (logand octet #b00001111) 2)) - ((= #b11110000 (logand octet #b11111000)) - (values (logand octet #b00000111) 3)) - ((= #b11111000 (logand octet #b11111100)) - (values (logand octet #b00000011) 4)) - ((= #b11111100 (logand octet #b11111110)) - (values (logand octet #b00000001) 5)) - (t (return-from body - (recover-from-encoding-error stream - "Unexpected value #x~X at start of UTF-8 sequence." - octet)))) - ;; note that we currently don't check for "overlong" - ;; sequences or other illegal values - (loop for result of-type (unsigned-byte 32) - = start then (+ (ash result 6) - (logand octet #b111111)) - repeat count - for octet of-type octet = (read-next-byte) - unless (= #b10000000 (logand octet #b11000000)) - do (return-from body - (recover-from-encoding-error stream - "Unexpected value #x~X in UTF-8 sequence." octet)) - finally (return result)))))))) - -(define-char-reader (stream flexi-utf-16-le-input-stream) - (block body - (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t))) - (read-next-word () - (+ (the octet (read-next-byte)) - (ash (the octet (read-next-byte)) 8)))) - (declare (inline read-next-byte read-next-word) - (dynamic-extent (function read-next-byte) (function read-next-word))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from body - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word))))))) - -(define-char-reader (stream flexi-utf-16-be-input-stream) - (block body - (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t))) - (read-next-word () - (+ (ash (the octet (read-next-byte)) 8) - (the octet (read-next-byte))))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from body - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word))))))) - -(define-char-reader (stream flexi-utf-32-le-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 0 to 24 by 8 - for octet of-type octet = (read-next-byte) - sum (ash octet count)))))) - -(define-char-reader (stream flexi-utf-32-be-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 24 downto 0 by 8 - for octet of-type octet = (read-next-byte) - sum (ash octet count)))))) - -(defmethod stream-read-char ((stream flexi-cr-mixin)) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of -reading one or more characters from the stream." - (declare (optimize speed)) - (let ((char (call-next-method))) - (when (eq char :eof) - (return-from stream-read-char :eof)) - (with-accessors ((external-format flexi-stream-external-format) - (last-char-code flexi-stream-last-char-code)) - stream - (when (eql char #\Return) - (case (external-format-eol-style external-format) - (:cr (setq char #\Newline - last-char-code #.(char-code #\Newline))) - ;; in the case :CRLF we have to look ahead one character - (:crlf (let ((next-char (call-next-method))) - (case next-char - (#\Linefeed - (setq char #\Newline - last-char-code #.(char-code #\Newline))) - (:eof) - ;; if the character we peeked at wasn't a - ;; linefeed character we push its - ;; constituents back onto our internal - ;; octet stack - (otherwise (unread-char% (char-code next-char) stream))))))) - char))) + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (let ((char-code (octets-to-char-code external-format + (lambda () + (read-byte* stream)) + (lambda (char) + (unread-char% char stream)) + stream))) + ;; remember this character and its char code for UNREAD-CHAR + (setq last-char-code char-code) + (or (code-char char-code) char-code))))
(defmethod stream-read-char-no-hang ((stream flexi-input-stream)) "Reads one character if the underlying stream has at least one @@ -540,7 +243,7 @@ (error 'flexi-stream-simple-error :format-control "Last character read (~S) was different from ~S." :format-arguments (list (code-char last-char-code) char))) - (unread-char% last-char-code stream) + (unread-char% char stream) (setq last-char-code nil) nil))
Modified: branches/edi/iso-8859.lisp ============================================================================== --- branches/edi/iso-8859.lisp (original) +++ branches/edi/iso-8859.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.6 2008/05/17 13:50:16 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions
Modified: branches/edi/lw-binary-stream.lisp ============================================================================== --- branches/edi/lw-binary-stream.lisp (original) +++ branches/edi/lw-binary-stream.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.10 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.13 2008/05/17 14:21:20 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -286,131 +286,7 @@ (defclass flexi-binary-cr-utf-8-io-stream (flexi-cr-mixin flexi-binary-utf-8-io-stream) () (:documentation "Like FLEXI-CR-UTF-8-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defmethod set-class ((stream flexi-binary-input-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-input-stream - 'flexi-binary-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-input-stream - 'flexi-binary-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-input-stream - 'flexi-binary-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-input-stream - 'flexi-binary-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-input-stream - 'flexi-binary-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-input-stream - 'flexi-binary-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-input-stream - 'flexi-binary-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-input-stream - 'flexi-binary-utf-32-be-input-stream)))))))))) - -(defmethod set-class ((stream flexi-binary-output-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-output-stream - 'flexi-binary-ascii-output-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-output-stream - 'flexi-binary-latin-1-output-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-output-stream - 'flexi-binary-8-bit-output-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-output-stream - 'flexi-binary-utf-8-output-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-output-stream - 'flexi-binary-cr-utf-16-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-output-stream - 'flexi-binary-utf-16-be-output-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-output-stream - 'flexi-binary-cr-utf-32-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-output-stream - 'flexi-binary-utf-32-be-output-stream)))))))))) - -(defmethod set-class ((stream flexi-binary-io-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-io-stream - 'flexi-binary-ascii-io-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-io-stream - 'flexi-binary-latin-1-io-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-io-stream - 'flexi-binary-8-bit-io-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-io-stream - 'flexi-binary-utf-8-io-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-io-stream - 'flexi-binary-cr-utf-16-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-io-stream - 'flexi-binary-utf-16-be-io-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-io-stream - 'flexi-binary-cr-utf-32-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-io-stream - 'flexi-binary-utf-32-be-io-stream)))))))))) - +optimized for LispWorks binary streams."))
(defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. @@ -423,8 +299,7 @@ (change-class flexi-stream (typecase flexi-stream (flexi-io-stream 'flexi-binary-io-stream) - (otherwise 'flexi-binary-output-stream))) - (set-class flexi-stream)))) + (otherwise 'flexi-binary-output-stream))))))
(defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. @@ -437,5 +312,4 @@ (change-class flexi-stream (typecase flexi-stream (flexi-io-stream 'flexi-binary-io-stream) - (otherwise 'flexi-binary-input-stream))) - (set-class flexi-stream)))) + (otherwise 'flexi-binary-input-stream))))))
Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.44 2007/12/29 22:23:23 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.47 2008/05/17 16:40:33 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -62,137 +62,15 @@ sink (write-byte byte stream)))
-(defmethod write-byte* (byte (sink array)) - (declare (optimize speed)) - (vector-push byte sink)) - -(defgeneric char-to-octets (stream char sink) - (:documentation "Converts the character CHAR to sequence of octets -and sends this sequence to SINK. STREAM will always be a flexi stream -which is used to determine how the character should be converted. -This function does all the work for STREAM-WRITE-CHAR in which case -SINK is the same as STREAM. It is also used in the implementation of -STREAM-WRITE-SEQUENCE below.")) - (defmethod stream-write-char ((stream flexi-output-stream) char) (declare (optimize speed)) - (char-to-octets stream char stream)) - -(defmethod char-to-octets ((stream flexi-latin-1-output-stream) char sink) - (declare (optimize speed)) - (let ((octet (char-code char))) - (when (> octet 255) - (signal-encoding-error stream "~S is not a LATIN-1 character." char)) - (write-byte* octet sink)) - char) - -(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) - (declare (optimize speed)) - (let ((octet (char-code char))) - (when (> octet 127) - (signal-encoding-error stream "~S is not an ASCII character." char)) - (write-byte* octet sink)) - char) - -(defmethod char-to-octets ((stream flexi-8-bit-output-stream) char sink) - (declare (optimize speed)) - (with-accessors ((encoding-hash flexi-stream-encoding-hash)) - stream - (let ((octet (gethash (char-code char) encoding-hash))) - (unless octet - (signal-encoding-error stream "~S is not in this encoding." char)) - (write-byte* octet sink)) - char)) - -(defmethod char-to-octets ((stream flexi-utf-8-output-stream) char sink) - (declare (optimize speed)) - (let ((char-code (char-code char))) - (tagbody - (cond ((< char-code #x80) - (write-byte* char-code sink) - (go zero)) - ((< char-code #x800) - (write-byte* (logior #b11000000 (ldb (byte 5 6) char-code)) sink) - (go one)) - ((< char-code #x10000) - (write-byte* (logior #b11100000 (ldb (byte 4 12) char-code)) sink) - (go two)) - ((< char-code #x200000) - (write-byte* (logior #b11110000 (ldb (byte 3 18) char-code)) sink) - (go three)) - ((< char-code #x4000000) - (write-byte* (logior #b11111000 (ldb (byte 2 24) char-code)) sink) - (go four)) - (t (write-byte* (logior #b11111100 (ldb (byte 1 30) char-code)) sink))) - (write-byte* (logior #b10000000 (ldb (byte 6 24) char-code)) sink) - four - (write-byte* (logior #b10000000 (ldb (byte 6 18) char-code)) sink) - three - (write-byte* (logior #b10000000 (ldb (byte 6 12) char-code)) sink) - two - (write-byte* (logior #b10000000 (ldb (byte 6 6) char-code)) sink) - one - (write-byte* (logior #b10000000 (ldb (byte 6 0) char-code)) sink) - zero)) - char) - -(defmethod char-to-octets ((stream flexi-utf-16-le-output-stream) char sink) - (declare (optimize speed)) - (flet ((write-word (word) - (write-byte* (ldb (byte 8 0) word) sink) - (write-byte* (ldb (byte 8 8) word) sink))) - (declare (inline write-word) (dynamic-extent (function write-word))) - (let ((char-code (char-code char))) - (cond ((< char-code #x10000) - (write-word char-code)) - (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) - -(defmethod char-to-octets ((stream flexi-utf-16-be-output-stream) char sink) - (declare (optimize speed)) - (flet ((write-word (word) - (write-byte* (ldb (byte 8 8) word) sink) - (write-byte* (ldb (byte 8 0) word) sink))) - (declare (inline write-word) (dynamic-extent (function write-word))) - (let ((char-code (char-code char))) - (cond ((< char-code #x10000) - (write-word char-code)) - (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) - -(defmethod char-to-octets ((stream flexi-utf-32-le-output-stream) char sink) - (declare (optimize speed)) - (loop with char-code = (char-code char) - for position in '(0 8 16 24) do - (write-byte* (ldb (byte 8 position) char-code) sink)) - char) - -(defmethod char-to-octets ((stream flexi-utf-32-be-output-stream) char sink) - (declare (optimize speed)) - (loop with char-code = (char-code char) - for position in '(24 16 8 0) do - (write-byte* (ldb (byte 8 position) char-code) sink)) - char) - -(defmethod char-to-octets ((stream flexi-cr-mixin) char sink) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of sending -one or more characters to SINK." - (declare (optimize speed)) (with-accessors ((external-format flexi-stream-external-format)) stream - (case char - (#\Newline - (case (external-format-eol-style external-format) - (:cr (call-next-method stream #\Return sink)) - (:crlf (call-next-method stream #\Return sink) - (call-next-method stream #\Linefeed sink)))) - (otherwise (call-next-method))) - char)) + (char-to-octets external-format + char + (lambda (octet) + (write-byte* octet stream)) + stream)))
(defmethod stream-write-char :after ((stream flexi-output-stream) char) (declare (optimize speed)) @@ -297,8 +175,13 @@ :start start :end end :from-end t))) - (loop for index from start below end - do (char-to-octets stream (aref sequence index) buffer) + (loop with format = (flexi-stream-external-format stream) + for index from start below end + do (char-to-octets format + (aref sequence index) + (lambda (octet) + (vector-push octet buffer)) + stream) when (>= (fill-pointer buffer) +buffer-size+) do (write-sequence buffer (flexi-stream-stream stream)) (setf (fill-pointer buffer) 0)
Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.30 2007/10/11 20:23:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.31 2008/05/17 13:50:16 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions
Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.25 2007/12/29 21:17:06 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.26 2008/05/17 13:50:16 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions
Modified: branches/edi/stream.lisp ============================================================================== --- branches/edi/stream.lisp (original) +++ branches/edi/stream.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.53 2007/12/29 22:26:04 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.57 2008/05/17 14:21:20 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -49,45 +49,6 @@ allow for multi-octet external formats. FLEXI-STREAM itself is a mixin and should not be instantiated."))
-(define-condition flexi-stream-error (stream-error) - () - (:documentation "Superclass for all errors related to -flexi streams.")) - -(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) - () - (:documentation "Like FLEXI-STREAM-ERROR but with formatting -capabilities.")) - -(define-condition flexi-stream-element-type-error (flexi-stream-error) - ((element-type :initarg :element-type - :reader flexi-stream-element-type-error-element-type)) - (:report (lambda (condition stream) - (format stream "Element type ~S not allowed." - (flexi-stream-element-type-error-element-type condition)))) - (:documentation "Errors of this type are signalled if the flexi -stream has a wrong element type.")) - -(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) - () - (:documentation "Errors of this type are signalled if there is an -encoding problem.")) - -(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) - ((position-spec :initarg :position-spec - :reader flexi-stream-position-spec-error-position-spec)) - (:documentation "Errors of this type are signalled if an -erroneous position spec is used in conjunction with -FILE-POSITION.")) - -(defun signal-encoding-error (flexi-stream format-control &rest format-args) - "Convenience function similar to ERROR to signal conditions of type -FLEXI-STREAM-ENCODING-ERROR." - (error 'flexi-stream-encoding-error - :format-control format-control - :format-arguments format-args - :stream flexi-stream)) - (defun maybe-convert-external-format (external-format) "Given an external format designator (a keyword, a list, or an EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT @@ -110,9 +71,7 @@ (error 'flexi-stream-element-type-error :element-type element-type :stream flexi-stream)) - (setq external-format (maybe-convert-external-format external-format))) - ;; set actual class and maybe contents of 8-bit encoding slots - (set-class flexi-stream)) + (setq external-format (maybe-convert-external-format external-format))))
(defmethod (setf flexi-stream-external-format) :around (new-value (flexi-stream flexi-stream)) "Converts the new value to an EXTERNAL-FORMAT object if @@ -226,461 +185,6 @@ MAKE-INSTANCE to create a new FLEXI-IO-STREAM but use MAKE-FLEXI-STREAM instead."))
-(defclass flexi-cr-mixin () - () - (:documentation "A mixin for flexi streams which need -end-of-line conversion, i.e. for those where the end-of-line -designator is /not/ the single character #\Linefeed.")) - -(defclass flexi-8-bit-stream (flexi-stream) - ((encoding-hash :accessor flexi-stream-encoding-hash)) - (:documentation "The class for all flexi streams which use an 8-bit -encoding and thus need an additional slot for the encoding hash.")) - -(defclass flexi-8-bit-input-stream (flexi-input-stream flexi-8-bit-stream) - ((encoding-table :accessor flexi-stream-encoding-table)) - (:documentation "The class for all flexi input streams which use an -8-bit encoding and thus need an additional slot for the encoding -table.")) - -(defclass flexi-cr-8-bit-input-stream (flexi-cr-mixin flexi-8-bit-input-stream) - () - (:documentation "The class for all flexi input streams which -use an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-input-stream (flexi-8-bit-input-stream) - () - (:documentation "Special class for flexi input streams which -use the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-input-stream (flexi-cr-mixin flexi-ascii-input-stream) - () - (:documentation "Special class for flexi input streams which -use the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-input-stream (flexi-8-bit-input-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-input-stream (flexi-cr-mixin flexi-latin-1-input-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-utf-32-le-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-32-be-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-utf-32-be-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-utf-16-le-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-16-be-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-utf-16-be-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-input-stream (flexi-cr-mixin flexi-utf-8-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-8-bit-output-stream (flexi-output-stream flexi-8-bit-stream) - () - (:documentation "The class for all flexi output streams which use an -8-bit encoding.")) - -(defclass flexi-cr-8-bit-output-stream (flexi-cr-mixin flexi-8-bit-output-stream) - () - (:documentation "The class for all flexi output streams which -use an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-output-stream (flexi-8-bit-output-stream) - () - (:documentation "Special class for flexi output streams which -use the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-output-stream (flexi-cr-mixin flexi-ascii-output-stream) - () - (:documentation "Special class for flexi output streams which -use the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-output-stream (flexi-8-bit-output-stream) - () - (:documentation "Special class for flexi output streams which -use the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-output-stream (flexi-cr-mixin flexi-latin-1-output-stream) - () - (:documentation "Special class for flexi output streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-utf-32-le-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-32-be-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-utf-32-be-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-utf-16-le-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-16-be-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-utf-16-be-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-output-stream (flexi-cr-mixin flexi-utf-8-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-8-bit-io-stream (flexi-8-bit-input-stream flexi-8-bit-output-stream flexi-io-stream) - () - (:documentation "The class for all flexi I/O streams which use an -8-bit encoding.")) - -(defclass flexi-cr-8-bit-io-stream (flexi-cr-mixin flexi-8-bit-io-stream) - () - (:documentation "The class for all flexi I/O streams which use -an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-io-stream (flexi-ascii-input-stream flexi-ascii-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-io-stream (flexi-cr-mixin flexi-ascii-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-io-stream (flexi-latin-1-input-stream flexi-latin-1-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-io-stream (flexi-cr-mixin flexi-latin-1-io-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-io-stream (flexi-utf-32-le-input-stream - flexi-utf-32-le-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-utf-32-le-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-32-be-io-stream (flexi-utf-32-be-input-stream - flexi-utf-32-be-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-utf-32-be-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-io-stream (flexi-utf-16-le-input-stream - flexi-utf-16-le-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-utf-16-le-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-be-io-stream (flexi-utf-16-be-input-stream - flexi-utf-16-be-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-utf-16-be-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-io-stream (flexi-utf-8-input-stream flexi-utf-8-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-io-stream (flexi-cr-mixin flexi-utf-8-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defmethod (setf flexi-stream-external-format) :after (new-value (stream flexi-stream)) - "After we've changed the external format of a flexi stream, we -might have to change its actual class and maybe also the contents -of its 8-bit encoding slots." - (declare (ignore new-value) - (optimize speed)) - ;; note that it's potentially dangerous to call SET-CLASS from - ;; within a method, see for example this thread: - ;; http://thread.gmane.org/gmane.lisp.lispworks.general/6269 - (set-class stream)) - -(defmethod set-class ((stream flexi-input-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-input-stream - 'flexi-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-input-stream - 'flexi-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-input-stream - 'flexi-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-input-stream - 'flexi-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-input-stream - 'flexi-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-input-stream - 'flexi-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-input-stream - 'flexi-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-input-stream - 'flexi-utf-32-be-input-stream)))))))))) - -(defmethod set-class ((stream flexi-output-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-output-stream - 'flexi-ascii-output-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-output-stream - 'flexi-latin-1-output-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-output-stream - 'flexi-8-bit-output-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-output-stream - 'flexi-utf-8-output-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-output-stream - 'flexi-cr-utf-16-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-output-stream - 'flexi-utf-16-be-output-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-output-stream - 'flexi-cr-utf-32-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-output-stream - 'flexi-utf-32-be-output-stream)))))))))) - -(defmethod set-class ((stream flexi-io-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-io-stream - 'flexi-ascii-io-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-io-stream - 'flexi-latin-1-io-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-io-stream - 'flexi-8-bit-io-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-io-stream - 'flexi-utf-8-io-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-io-stream - 'flexi-cr-utf-16-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-io-stream - 'flexi-utf-16-be-io-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-io-stream - 'flexi-cr-utf-32-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-io-stream - 'flexi-utf-32-be-io-stream)))))))))) - -(defmethod set-class :after ((stream flexi-stream)) - "After we've changed the actual class of a flexi stream we may -have to set the contents of the 8-bit enconding slots as well." - (declare (optimize speed)) - (set-encoding-hash stream) - (set-encoding-table stream)) - -(defgeneric set-encoding-hash (stream) - (:method (stream)) - (:documentation "Sets the value of the ENCODING-HASH slot of -STREAM if necessary. The default method does nothing.")) - -(defgeneric set-encoding-table (stream) - (:method (stream)) - (:documentation "Sets the value of the ENCODING-TABLE slot of -STREAM if necessary. The default method does nothing.")) - -(defmethod set-encoding-hash ((stream flexi-8-bit-stream)) - "Sets the value of the ENCODING-HASH slot of STREAM depending -on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format) - (encoding-hash flexi-stream-encoding-hash)) - stream - (let ((external-format-name (external-format-name external-format))) - (setq encoding-hash - (cond ((ascii-name-p external-format-name) +ascii-hash+) - ((koi8-r-name-p external-format-name) +koi8-r-hash+) - ((iso-8859-name-p external-format-name) - (cdr (assoc external-format-name +iso-8859-hashes+ :test #'eq))) - ((code-page-name-p external-format-name) - (cdr (assoc (external-format-id external-format) +code-page-hashes+)))))))) - -(defmethod set-encoding-table ((stream flexi-8-bit-input-stream)) - "Sets the value of the ENCODING-TABLE slot of STREAM depending -on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format) - (encoding-table flexi-stream-encoding-table)) - stream - (let ((external-format-name (external-format-name external-format))) - (setq encoding-table - (cond ((ascii-name-p external-format-name) +ascii-table+) - ((koi8-r-name-p external-format-name) +koi8-r-table+) - ((iso-8859-name-p external-format-name) - (cdr (assoc external-format-name +iso-8859-tables+ :test #'eq))) - ((code-page-name-p external-format-name) - (cdr (assoc (external-format-id external-format) +code-page-tables+)))))))) - #+:cmu (defmethod input-stream-p ((stream flexi-io-stream)) "Explicitly states whether this is an input stream."
Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.4 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.5 2008/05/17 13:50:16 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions
Modified: branches/edi/test/packages.lisp ============================================================================== --- branches/edi/test/packages.lisp (original) +++ branches/edi/test/packages.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.4 2007/01/01 23:47:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.6 2008/05/17 16:38:26 edi Exp $
-;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -30,4 +30,5 @@ (in-package :cl-user)
(defpackage :flexi-streams-test - (:use :cl :flexi-streams)) + (:use :cl :flexi-streams) + (:export :run-tests))
Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.17 2007/12/29 22:58:44 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.20 2008/05/17 13:50:18 edi Exp $
-;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -89,13 +89,17 @@ (append args `(:eol-style ,eol-style :little-endian ,little-endian))))))))
-(defun create-test-combinations (file-name symbols) - "For a name suffix FILE-NAME and a list of symbols SYMBOLS -denoting different encodings of the corresponding file returns a -list of lists which can be used as arglists for COMPARE-FILES." +(defun create-test-combinations (file-name symbols &optional simplep) + "For a name suffix FILE-NAME and a list of symbols SYMBOLS denoting +different encodings of the corresponding file returns a list of lists +which can be used as arglists for COMPARE-FILES. If SIMPLEP is true, +a list which can be used for the string tests below is returned." (let ((file-variants (loop for symbol in symbols nconc (create-file-variants file-name symbol)))) (loop for (name-in . external-format-in) in file-variants + when simplep + collect (list name-in external-format-in) + else nconc (loop for (name-out . external-format-out) in file-variants collect (list name-in external-format-in name-out external-format-out)))))
@@ -200,6 +204,27 @@ #+:lispworks (terpri *error-output*)))))
+(defun file-as-octet-vector (pathspec) + "Returns the contents of the file denoted by PATHSPEC as a vector of +octets." + (with-open-file (in pathspec :element-type 'octet) + (let ((vector (make-array (file-length in) :element-type 'octet))) + (read-sequence vector in) + vector))) + +(defun file-as-string (pathspec external-format) + "Reads the contents of the file denoted by PATHSPEC using the +external format EXTERNAL-FORMAT and returns the result as a string." + (with-open-file (in pathspec :element-type 'octet) + (let* ((number-of-octets (file-length in)) + (in (make-flexi-stream in :external-format external-format)) + (string (make-array number-of-octets + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer string) (read-sequence string in)) + string))) + (defmacro with-test ((test-description) &body body) "Defines a test. Two utilities are available inside of the body of the maco: The function FAIL, and the macro CHECK. FAIL, the lowest @@ -231,6 +256,21 @@ (terpri *error-output*)) ,successp))))
+(defun string-test (pathspec external-format) + "Tests whether conversion from strings to octets and vice versa +using the external format EXTERNAL-FORMAT works as expected, using the +contents of the file denoted by PATHSPEC as test data and assuming +that the stream conversion functions work." + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (octets-vector (file-as-octet-vector full-path)) + (octets-list (coerce octets-vector 'list)) + (string (file-as-string full-path external-format))) + (with-test ((format nil "String tests with format ~S." + (flex::normalize-external-format external-format))) + (check (string= (octets-to-string octets-vector :external-format external-format) string)) + (check (string= (octets-to-string octets-list :external-format external-format) string)) + (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) + (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals @@ -262,6 +302,9 @@ (defun encoding-error-handling-test () "Tests several possible encoding errors and how they are handled." (with-test ("Handling of encoding errors.") + ;; handling of EOF in the middle of CRLF + (check (string= #.(string #\Return) + (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) (let ((*substitution-char* #?)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) @@ -326,13 +369,18 @@ CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, and shows simple statistics at the end." (let* ((*test-success-counter* 0) - (args-list (loop for (file-name symbols) in *test-files* - nconc (create-test-combinations file-name symbols))) - (no-tests (* 4 (length args-list)))) + (compare-files-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols))) + (no-tests (* 4 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - (dolist (args args-list) - (apply #'compare-files args)) + (dolist (args compare-files-args-list) + (apply 'compare-files args)) + (let ((string-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length string-test-args-list)) + (dolist (args string-test-args-list) + (apply 'string-test args))) (incf no-tests) (encoding-error-handling-test) (incf no-tests)
Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.13 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.14 2008/05/17 13:50:16 edi Exp $
-;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions