flexi-streams-cvs
  Threads by month 
                
            - ----- 2025 -----
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2024 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2023 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2022 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2021 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2020 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2019 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2018 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2017 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2016 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2015 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2014 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2013 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2012 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2011 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2010 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2009 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 - April
 - March
 - February
 - January
 - ----- 2008 -----
 - December
 - November
 - October
 - September
 - August
 - July
 - June
 - May
 
May 2008
- 2 participants
 - 61 discussions
 
25 May '08
                    
                        Author: eweitz
Date: Sun May 25 19:43:22 2008
New Revision: 61
Modified:
   branches/edi/CHANGELOG
   branches/edi/conditions.lisp
   branches/edi/decode.lisp
   branches/edi/doc/index.html
   branches/edi/flexi-streams.asd
   branches/edi/length.lisp
   branches/edi/packages.lisp
   branches/edi/strings.lisp
   branches/edi/test/test.lisp
Log:
Ready for release
Modified: branches/edi/CHANGELOG
==============================================================================
--- branches/edi/CHANGELOG	(original)
+++ branches/edi/CHANGELOG	Sun May 25 19:43:22 2008
@@ -1,3 +1,10 @@
+Version 1.0.0
+2008-05-26
+More redesign for the sake of performance
+More checks for invalid data
+More tests
+Exported functions for length computation
+
 Version 0.15.3
 2008-05-23
 Avoid CHANGE-CLASS on LispWorks if possible
Modified: branches/edi/conditions.lisp
==============================================================================
--- branches/edi/conditions.lisp	(original)
+++ branches/edi/conditions.lisp	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.9 2008/05/25 22:23:58 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -93,21 +93,11 @@
   ()
   (:documentation "Superclass for all errors related to external
 formats."))
-
-(define-condition external-format-warning (external-format-condition warning)
-  ()
-  (:documentation "Superclass for all warnings related to external
-formats."))
   
 (define-condition external-format-encoding-error (external-format-error)
   ()
   (:documentation "Errors of this type are signalled if there is an
 encoding problem."))
-  
-(define-condition external-format-encoding-warning (external-format-warning)
-  ()
-  (:documentation "Warnings of this type are signalled if there is an
-encoding problem."))
 
 (defun signal-encoding-error (external-format format-control &rest format-args)
   "Convenience function similar to ERROR to signal conditions of type
@@ -116,11 +106,3 @@
          :format-control format-control
          :format-arguments format-args
          :external-format external-format))
-
-(defun signal-encoding-warning (external-format format-control &rest format-args)
-  "Convenience function similar to WARN to signal conditions of type
-EXTERNAL-FORMAT-ENCODING-WARNING."
-  (warn 'external-format-encoding-warning
-        :format-control format-control
-        :format-arguments format-args
-        :external-format external-format))
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.29 2008/05/25 23:19:19 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -202,7 +202,7 @@
          (declare #.*standard-optimize-settings*)
          (declare (fixnum start end))
          (let* ((i start)
-                (string-length (compute-number-of-chars format sequence start end nil))
+                (string-length (compute-number-of-chars format sequence start end))
                 (string (make-array string-length :element-type 'char*)))
            (declare (fixnum i string-length))
            (loop for j of-type fixnum from 0 below string-length
@@ -223,39 +223,46 @@
 encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
 similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
 BODY is a code template for the code to read octets and return one
-character.  BODY must contain a symbol OCTET-GETTER representing the
-form which is used to obtain the next octet."
-  `(progn
-     (defmethod octets-to-char-code ((format ,lf-format-class) reader)
-       (declare #.*fixnum-optimize-settings*)
-       (declare (function reader))
-       (symbol-macrolet ((octet-getter (funcall reader)))
-         ,@(sublis '((char-decoder . octets-to-char-code))
-                   body)))
-     (define-sequence-readers (,lf-format-class) ,@body)
-     (define-sequence-readers (,cr-format-class)
-       ,(with-unique-names (char-code)
-          `(let ((,char-code (progn ,@body)))
-             (case ,char-code
-               (#.+cr+ #.(char-code #\Newline))
-               (otherwise ,char-code)))))
-     (define-sequence-readers  (,crlf-format-class)
-       ,(with-unique-names (char-code next-char-code get-char-code)
-          `(flet ((,get-char-code () ,@body))
-             (let ((,char-code (,get-char-code)))
+character code.  BODY must contain a symbol OCTET-GETTER representing
+the form which is used to obtain the next octet."
+  (let* ((body (with-unique-names (char-code)
+                 `((let ((,char-code (progn ,@body)))
+                     (when (and ,char-code
+                                (or (<= #xd8 (logand* #x00ff (ash* ,char-code -8)) #xdf)
+                                    (> ,char-code #x10ffff)))
+                       (recover-from-encoding-error format "Illegal code point ~A \(#x~:*~X)." ,char-code))
+                     ,char-code)))))
+    `(progn
+       (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+         (declare #.*fixnum-optimize-settings*)
+         (declare (function reader))
+         (symbol-macrolet ((octet-getter (funcall reader)))
+           ,@(sublis '((char-decoder . octets-to-char-code))
+                     body)))
+       (define-sequence-readers (,lf-format-class) ,@body)
+       (define-sequence-readers (,cr-format-class)
+         ,(with-unique-names (char-code)
+            `(let ((,char-code (progn ,@body)))
                (case ,char-code
-                 (#.+cr+
-                  (let ((,next-char-code (,get-char-code)))
-                    (case ,next-char-code
-                      (#.+lf+ #.(char-code #\Newline))
-                      ;; we saw a CR but no LF afterwards, but then the data
-                      ;; ended, so we just return #\Return
-                      ((nil) +cr+)
-                      ;; if the character we peeked at wasn't a
-                      ;; linefeed character we unread its constituents
-                      (otherwise (unget (code-char ,next-char-code))
-                                 ,char-code))))
-                 (otherwise ,char-code))))))))
+                 (#.+cr+ #.(char-code #\Newline))
+                 (otherwise ,char-code)))))
+       (define-sequence-readers  (,crlf-format-class)
+         ,(with-unique-names (char-code next-char-code get-char-code)
+            `(flet ((,get-char-code () ,@body))
+               (let ((,char-code (,get-char-code)))
+                 (case ,char-code
+                   (#.+cr+
+                    (let ((,next-char-code (,get-char-code)))
+                      (case ,next-char-code
+                        (#.+lf+ #.(char-code #\Newline))
+                        ;; we saw a CR but no LF afterwards, but then the data
+                        ;; ended, so we just return #\Return
+                        ((nil) +cr+)
+                        ;; if the character we peeked at wasn't a
+                        ;; linefeed character we unread its constituents
+                        (otherwise (unget (code-char ,next-char-code))
+                                   ,char-code))))
+                   (otherwise ,char-code)))))))))
 
 (define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
   octet-getter)
@@ -296,25 +303,28 @@
         (multiple-value-bind (start count)
             (cond ((not (logbitp 7 octet))
                    (values octet 0))
-                  ((= #b11000000 (logand octet #b11100000))
-                   (values (logand octet #b00011111) 1))
-                  ((= #b11100000 (logand octet #b11110000))
-                   (values (logand octet #b00001111) 2))
-                  ((= #b11110000 (logand octet #b11111000))
-                   (values (logand octet #b00000111) 3))
+                  ((= #b11000000 (logand* octet #b11100000))
+                   (when (= #b11000000 (logand* octet #b11111110))
+                     (return-from char-decoder
+                       (recover-from-encoding-error format
+                                                    "Illegal value #x~X leads to `overlong' UTF-8 sequence."
+                                                    octet)))
+                   (values (logand* octet #b00011111) 1))
+                  ((= #b11100000 (logand* octet #b11110000))
+                   (values (logand* octet #b00001111) 2))
+                  ((= #b11110000 (logand* octet #b11111000))
+                   (values (logand* octet #b00000111) 3))
                   (t (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected value #x~X at start of UTF-8 sequence."
                                                     octet))))
           (declare (fixnum count))
-          ;; note that we currently don't check for "overlong"
-          ;; sequences or other illegal values
           (loop for result of-type code-point
-                = start then (+ (ash result 6)
-                                (logand octet #b111111))
+                = start then (+ (ash* result 6)
+                                (logand* octet #b111111))
                 repeat count
                 for octet of-type octet = (read-next-byte)
-                unless (= #b10000000 (logand octet #b11000000))
+                unless (= #b10000000 (logand* octet #b11000000))
                 do (return-from char-decoder
                      (recover-from-encoding-error format
                                                   "Unexpected value #x~X in UTF-8 sequence." octet))
@@ -334,7 +344,7 @@
                     (setq first-octet-seen t))))
       (flet ((read-next-word ()
                (+ (the octet (read-next-byte))
-                  (ash (the octet (read-next-byte)) 8))))
+                  (ash* (the octet (read-next-byte)) 8))))
         (declare (inline read-next-word))
         (let ((word (read-next-word)))
           (declare (type (unsigned-byte 16) word))
@@ -346,8 +356,8 @@
                        (recover-from-encoding-error format
                                                     "Unexpected UTF-16 word #x~X following #x~X."
                                                     next-word word)))
-                   (+ (ash (logand #b1111111111 word) 10)
-                      (logand #b1111111111 next-word)
+                   (+ (ash* (logand* #b1111111111 word) 10)
+                      (logand* #b1111111111 next-word)
                       #x10000)))
                 (t word)))))))
 
@@ -364,7 +374,7 @@
                                 (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (flet ((read-next-word ()
-               (+ (ash (the octet (read-next-byte)) 8)
+               (+ (ash* (the octet (read-next-byte)) 8)
                   (the octet (read-next-byte)))))
         (declare (inline read-next-word))
         (let ((word (read-next-word)))
@@ -377,8 +387,8 @@
                        (recover-from-encoding-error format
                                                     "Unexpected UTF-16 word #x~X following #x~X."
                                                     next-word word)))
-                   (+ (ash (logand #b1111111111 word) 10)
-                      (logand #b1111111111 next-word)
+                   (+ (ash* (logand* #b1111111111 word) 10)
+                      (logand* #b1111111111 next-word)
                       #x10000)))
                 (t word)))))))
 
@@ -396,7 +406,7 @@
                     (setq first-octet-seen t))))
       (loop for count of-type fixnum from 0 to 24 by 8
             for octet of-type octet = (read-next-byte)
-            sum (ash octet count)))))
+            sum (ash* octet count)))))
 
 (define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
   (let (first-octet-seen)
@@ -412,7 +422,7 @@
                     (setq first-octet-seen t))))
       (loop for count of-type fixnum from 24 downto 0 by 8
             for octet of-type octet = (read-next-byte)
-            sum (ash octet count)))))
+            sum (ash* octet count)))))
 
 (defmethod octets-to-char-code ((format flexi-cr-mixin) reader)
   (declare #.*fixnum-optimize-settings*)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html	(original)
+++ branches/edi/doc/index.html	Sun May 25 19:43:22 2008
@@ -72,7 +72,6 @@
       <li><a href="#external-format-condition"><code>external-format-condition</code></a>
       <li><a href="#external-format-condition-external-format"><code>external-format-condition-external-format</code></a>
       <li><a href="#external-format-error"><code>external-format-error</code></a>
-      <li><a href="#external-format-warning"><code>external-format-warning</code></a>
       <li><a href="#external-format-encoding-error"><code>external-format-encoding-error</code></a>
       <li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
       </ol>
@@ -229,7 +228,7 @@
 <p>
 FLEXI-STREAMS together with this documentation can be downloaded from <a
 href="http://weitz.de/files/flexi-streams.tar.gz">http://weitz.de/files/flexi-streams.tar.gz</a>. The
-current version is 0.15.3.
+current version is 1.0.0.
 <p>
 Before you install FLEXI-STREAMS you first need to
 install the <a
@@ -548,14 +547,6 @@
 </blockquote>
 
 <p><br>[Condition]
-<br><a class=none name="external-format-warning"><b>external-format-warning</b></a>
-
-<blockquote><br>
-All warnings related to <a href="#external-formats">external formats</a> are of this type.
-This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
-</blockquote>
-
-<p><br>[Condition]
 <br><a class=none name="external-format-error"><b>external-format-error</b></a>
 
 <blockquote><br>
@@ -1063,7 +1054,7 @@
 The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
-for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+for <code><i>external-format</i></code> is <code>:LATIN1</code>.  Note that this function doesn't check for the validity of the data in <code><i>sequence</i></code>.
 <p>
 This function is optimized for the case
 of <code><i>sequence</i></code> being
@@ -1110,7 +1101,7 @@
 his work on making FLEXI-STREAMS faster.
 
 <p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.119 2008/05/25 23:42:30 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>
Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd	(original)
+++ branches/edi/flexi-streams.asd	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.71 2008/05/25 23:42:28 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -35,7 +35,7 @@
 (in-package :flexi-streams-system)
 
 (defsystem :flexi-streams
-  :version "0.15.3"
+  :version "1.0.0"
   :serial t
   :components ((:file "packages")
                (:file "mapping")
Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp	(original)
+++ branches/edi/length.lisp	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.4 2008/05/25 22:23:58 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -72,51 +72,50 @@
   ;; the estimate unexact
   (* 1.02d0 (call-next-method)))
 
-(defgeneric check-end (format start end i warnp)
+(defgeneric check-end (format start end i)
   (declare #.*fixnum-optimize-settings*)
   (:documentation "Helper function used below to determine if we tried
 to read past the end of the sequence.")
-  (:method (format start end i warnp)
+  (:method (format start end i)
    (declare #.*fixnum-optimize-settings*)
    (declare (fixnum start end i))
-   (when (and warnp (> i end))
-     (signal-encoding-warning format "These ~A octet~:P can't be ~
+   (when (> i end)
+     (signal-encoding-error format "These ~A octet~:P can't be ~
 decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
 at then end."
-                              (- end start)
-                              (external-format-name format)
-                              (- i end))))
-  (:method ((format flexi-utf-16-format) start end i warnp)
+                            (- end start)
+                            (external-format-name format)
+                            (- i end))))
+  (:method ((format flexi-utf-16-format) start end i)
    (declare #.*fixnum-optimize-settings*)
    (declare (fixnum start end i))
-   (declare (ignore i warnp))
+   (declare (ignore i))
    ;; don't warn twice
    (when (evenp (- end start))
      (call-next-method))))
 
-(defgeneric compute-number-of-chars (format sequence start end warnp)
+(defgeneric compute-number-of-chars (format sequence start end)
   (declare #.*standard-optimize-settings*)
   (:documentation "Computes the exact number of characters required to
 decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT.  If WARNP is NIL, warnings will be muffled."))
+external format FORMAT."))
 
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+(defmethod compute-number-of-chars :around (format (list list) start end)
   (declare #.*standard-optimize-settings*)
-  (call-next-method format (coerce list 'vector) start end warnp))
+  (call-next-method format (coerce list 'vector) start end))
 
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence warnp))
+  (declare (ignore sequence))
   (- end start))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end)
   ;; this method only applies to the 8-bit formats as all other
   ;; formats with CRLF line endings have their own specialized methods
   ;; below
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
-  (declare (ignore warnp))
   (let ((i start)
         (length (- end start)))
     (declare (fixnum i length))
@@ -130,7 +129,7 @@
        (decf length)))
     length))
 
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
@@ -140,17 +139,18 @@
      (when (>= i end)
        (return))
      (let* ((octet (aref sequence i))
+            ;; note that there are no validity checks here
             (length (cond ((not (logbitp 7 octet)) 1)
-                          ((= #b11000000 (logand octet #b11100000)) 2)
-                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          ((= #b11000000 (logand* octet #b11100000)) 2)
+                          ((= #b11100000 (logand* octet #b11110000)) 3)
                           (t 4))))
        (declare (fixnum length) (type octet octet))
        (incf sum)
        (incf i length)))
-    (check-end format start end i warnp)
+    (check-end format start end i)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
@@ -161,28 +161,29 @@
      (when (>= i end)
        (return))
      (let* ((octet (aref sequence i))
+            ;; note that there are no validity checks here
             (length (cond ((not (logbitp 7 octet)) 1)
-                          ((= #b11000000 (logand octet #b11100000)) 2)
-                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          ((= #b11000000 (logand* octet #b11100000)) 2)
+                          ((= #b11100000 (logand* octet #b11110000)) 3)
                           (t 4))))
        (declare (fixnum length) (type octet octet))
        (unless (and (= octet +lf+) (= last-octet +cr+))
          (incf sum))
        (incf i length)
        (setq last-octet octet)))
-    (check-end format start end i warnp)
+    (check-end format start end i)
     sum))
 
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
   (declare (ignore sequence))
-  (when (and warnp (oddp (- end start)))
-    (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+  (when (oddp (- end start))
+    (signal-encoding-error format "~A octet~:P cannot be decoded ~
 using UTF-16 as ~:*~A is not even."
-                             (- end start))))  
+                           (- end start))))
   
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
@@ -198,10 +199,10 @@
        (declare (fixnum length) (type octet high-octet))
        (incf sum)
        (incf i length)))
-    (check-end format start (+ end 2) i warnp)
+    (check-end format start (+ end 2) i)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
@@ -217,10 +218,10 @@
        (declare (fixnum length) (type octet high-octet))
        (incf sum)
        (incf i length)))
-    (check-end format start (+ end 2) i warnp)
+    (check-end format start (+ end 2) i)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
@@ -243,10 +244,10 @@
                           (aref sequence i)
                           0))
        (incf i length)))
-    (check-end format start (+ end 2) i warnp)
+    (check-end format start (+ end 2) i)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
@@ -269,29 +270,28 @@
                           (aref sequence (1+ i))
                           0))
        (incf i length)))
-    (check-end format start (+ end 2) i warnp)
+    (check-end format start (+ end 2) i)
     sum))
 
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (declare (ignore sequence))
   (let ((length (- end start)))
-    (when (and warnp (plusp (mod length 4)))
-      (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+    (when (plusp (mod length 4))
+      (signal-encoding-error format "~A octet~:P cannot be decoded ~
 using UTF-32 as ~:*~A is not a multiple-value of four."
-                               length))))
+                             length))))
 
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence warnp))
+  (declare (ignore sequence))
   (ceiling (- end start) 4))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
-  (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
     (decf end 8)
@@ -306,10 +306,9 @@
            (t (incf i 4))))
     length))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end) (vector sequence))
-  (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
     (decf end 8)
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp	(original)
+++ branches/edi/packages.lisp	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.38 2008/05/25 22:23:58 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -46,12 +46,10 @@
            :external-format-eol-style
            :external-format-error
            :external-format-encoding-error
-           :external-format-encoding-warning
            :external-format-equal
            :external-format-id
            :external-format-little-endian
            :external-format-name
-           :external-format-warning
            :flexi-input-stream
            :flexi-output-stream
            :flexi-io-stream
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.32 2008/05/25 23:09:13 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -72,11 +72,12 @@
 (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
   "Kind of the inverse of OCTET-LENGTH.  Returns the length of the
 subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT.
+if decoded using the external format EXTERNAL-FORMAT.  Note that this
+function doesn't check for the validity of the data in SEQUENCE.
 
 This function is optimized for the case of SEQUENCE being a vector.
 Don't use lists if you're in a hurry."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))
-  (compute-number-of-chars external-format sequence start end t))
+  (compute-number-of-chars external-format sequence start end))
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp	(original)
+++ branches/edi/test/test.lisp	Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.35 2008/05/25 23:10:47 edi Exp $
 
 ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,6 +29,48 @@
 
 (in-package :flexi-streams-test)
 
+(defmacro with-test ((test-description) &body body)
+  "Defines a test.  Two utilities are available inside of the body of
+the maco: The function FAIL, and the macro CHECK.  FAIL, the lowest
+level utility, marks the test defined by WITH-TEST as failed.  CHECK
+checks whether its argument is true, otherwise it calls FAIL. If
+during evaluation of the specified expression any condition is
+signalled, this is also considered a failure.
+
+WITH-TEST prints reports while the tests run.  It also increments
+*TEST-SUCCESS-COUNT* if a test completes successfully."
+  (flex::with-unique-names (successp)
+    `(let ((,successp t))
+       (flet ((fail (format-str &rest format-args)
+                (setf ,successp nil)
+                (apply #'format *error-output* format-str format-args)))
+         (macrolet ((check (expression)
+                      `(handler-case
+                           (unless ,expression
+                             (fail "Expression ~S failed.~%" ',expression))
+                         (error (c)
+                           (fail "Expression ~S failed signalling error of type ~A: ~A.~%" 
+                                 ',expression (type-of c) c))))
+                    (with-expected-error ((condition-type) &body body)
+                      `(handler-case (progn ,@body)
+                         (,condition-type () t)
+                         (:no-error (&rest args)
+                           (declare (ignore args))                           
+                           (fail "Expected condition ~S not signalled~%"
+                                 ',condition-type)))))
+           (format *error-output* "Test ~S~%" ,test-description)
+           ,@body
+           (if ,successp
+             (incf *test-success-counter*)
+             (format *error-output* "    Test failed!!!~%"))
+           (terpri *error-output*)
+           (terpri *error-output*))
+         ,successp))))
+
+;; LW can't indent this correctly because it's in a MACROLET
+#+:lispworks
+(editor:setup-indent "with-expected-error" 1 2 4)
+
 (defconstant +buffer-size+ 8192
   "Size of buffers for COPY-STREAM* below.")
 
@@ -245,37 +287,6 @@
       (setf (fill-pointer string) (read-sequence string in))
       string)))
 
-(defmacro with-test ((test-description) &body body)
-  "Defines a test.  Two utilities are available inside of the body of
-the maco: The function FAIL, and the macro CHECK.  FAIL, the lowest
-level utility, marks the test defined by WITH-TEST as failed.  CHECK
-checks whether its argument is true, otherwise it calls FAIL. If
-during evaluation of the specified expression any condition is
-signalled, this is also considered a failure.
-
-WITH-TEST prints reports while the tests run.  It also increments
-*TEST-SUCCESS-COUNT* if a test completes successfully."
-  (flex::with-unique-names (successp)
-    `(let ((,successp t))
-       (flet ((fail (format-str &rest format-args)
-                (setf ,successp nil)
-                (apply #'format *error-output* format-str format-args)))
-         (macrolet ((check (expression)
-                      `(handler-case
-                           (unless ,expression
-                             (fail "Expression ~S failed.~%" ',expression))
-                         (error (c)
-                           (fail "Expression ~S failed signalling error of type ~A: ~A.~%" 
-                                 ',expression (type-of c) c)))))
-           (format *error-output* "Test ~S~%" ,test-description)
-           ,@body
-           (if ,successp
-             (incf *test-success-counter*)
-             (format *error-output* "    Test failed!!!~%"))
-           (terpri *error-output*)
-           (terpri *error-output*))
-         ,successp))))
-
 (defun old-string-to-octets (string &key
                                     (external-format (make-external-format :latin1))
                                     (start 0) end)
@@ -460,7 +471,51 @@
 
 (defun error-handling-test ()
   "Tests several possible errors and how they are handled."
-  (with-test ("Handling of errors.")
+  (with-test ("Illegal values.")
+    (macrolet ((want-encoding-error (input format)
+                 `(with-expected-error (external-format-encoding-error)
+                    (read-flexi-line* ,input ,format))))
+      ;; "overlong"
+      (want-encoding-error #(#b11000000) :utf-8)
+      (want-encoding-error #(#b11000001) :utf-8)
+      ;; examples of invalid lead octets
+      (want-encoding-error #(#b11111000) :utf-8)
+      (want-encoding-error #(#b11111001) :utf-8)
+      (want-encoding-error #(#b11111100) :utf-8)
+      (want-encoding-error #(#b11111101) :utf-8)
+      (want-encoding-error #(#b11111110) :utf-8)
+      (want-encoding-error #(#b11111111) :utf-8)
+      ;; illegal code points
+      (want-encoding-error #(#x00 #x00 #x11 #x00) :utf-32le)
+      (want-encoding-error #(#x00 #xd8) :utf-16le)
+      (want-encoding-error #(#xff #xdf) :utf-16le)))
+  (with-test ("Illegal lengths.")
+    (macrolet ((want-encoding-error (input format)
+                 `(with-expected-error (external-format-encoding-error)
+                    (read-flexi-line* ,input ,format))))                 
+      ;; UTF-8 sequences which are too short
+      (want-encoding-error #(#xe4 #xf6 #xfc) :utf8)
+      (want-encoding-error #(#xc0) :utf8)
+      (want-encoding-error #(#xe0 #xff) :utf8)
+      (want-encoding-error #(#xf0 #xff #xff) :utf8)
+      ;; UTF-16 wants an even number of octets
+      (want-encoding-error #(#x01) :utf-16le)
+      (want-encoding-error #(#x01 #x01 #x01) :utf-16le)
+      (want-encoding-error #(#x01) :utf-16be)
+      (want-encoding-error #(#x01 #x01 #x01) :utf-16be)
+      ;; another word should follow but it doesn't
+      (want-encoding-error #(#x01 #xd8) :utf-16le)
+      (want-encoding-error #(#xd8 #x01) :utf-16be)
+      ;; UTF-32 always wants four octets
+      (want-encoding-error #(#x01) :utf-32le)
+      (want-encoding-error #(#x01 #x01) :utf-32le)
+      (want-encoding-error #(#x01 #x01 #x01) :utf-32le)
+      (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32le)
+      (want-encoding-error #(#x01) :utf-32be)
+      (want-encoding-error #(#x01 #x01) :utf-32be)
+      (want-encoding-error #(#x01 #x01 #x01) :utf-32be)
+      (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32be)))
+  (with-test ("Errors while decoding and substitution of characters.")
     ;; handling of EOF in the middle of CRLF
     (check (string= #.(string #\Return)
                     (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf))))
@@ -472,11 +527,7 @@
       (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))
       (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))
       ;; not a valid UTF-8 sequence
-      (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))
-      (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
-      ;; UTF-8 can't start neither with #b11111110 nor with #b11111111
-      (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8)))
-      (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+      (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
     (let ((*substitution-char* nil))
       ;; :ASCII doesn't have characters with char codes > 127
       (check (string= "abc" (using-values (#\b #\c)
@@ -490,16 +541,12 @@
                               (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))))
       ;; not a valid UTF-8 sequence
       (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
-      (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
       ;; UTF-8 can't start neither with #b11111110 nor with #b11111111
       (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8))))
-      (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
       ;; only one byte
       (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le))))
-      (check (string= "" (read-flexi-line* #(#x01) :utf-16le)))
       ;; two bytes, but value of resulting word suggests that another word follows
       (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le))))
-      (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le))))
       ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff
       (check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le))))
       (check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le))))
@@ -507,11 +554,10 @@
       (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be))))
       (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be))))
       (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be))))
-      (check (string= "" (read-flexi-line* #(#x01) :utf-16be)))
-      (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be))))
       (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be))))
-      ;; the only case when error is signalled for UTF-32 is at end of file
-      ;; in the middle of 4-byte sequence, both for big and little endian
+      ;; the only case when errors are signalled for UTF-32 is at end
+      ;; of file in the middle of 4-byte sequence, both for big and
+      ;; little endian
       (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le))))
       (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le))))
       (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le))))
@@ -521,17 +567,7 @@
       (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be))))
       (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be))))
       (check (string= "aY" (using-values (#\Y)
-                             (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))
-      (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le))))
-      (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le))))
-      (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le))))
-      (check (string= "aY" (using-values (#\Y)
-                             (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le))))
-      (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be))))
-      (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be))))
-      (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be))))
-      (check (string= "aY" (using-values (#\Y)
-                             (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
+                             (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
 
 (defun unread-char-test ()
   "Tests whether UNREAD-CHAR behaves as expected."
@@ -572,7 +608,7 @@
       (incf no-tests (length read-sequence-test-args-list))
       (dolist (args read-sequence-test-args-list)
         (apply 'sequence-test args)))
-    (incf no-tests)
+    (incf no-tests 3)
     (error-handling-test)
     (incf no-tests)
     (unread-char-test)
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sun May 25 17:36:37 2008
New Revision: 60
Modified:
   branches/edi/encode.lisp
   branches/edi/util.lisp
Log:
Help some Lisps optimize the encoding functions
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sun May 25 17:36:37 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.22 2008/05/25 20:44:03 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.23 2008/05/25 21:26:12 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -203,65 +203,67 @@
       (octet-writer octet))))
 
 (define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
+  ;; the old version using LDB was more elegant, but some Lisps had
+  ;; trouble optimizing it
   (let ((char-code (char-code char-getter)))
     (tagbody
      (cond ((< char-code #x80)
             (octet-writer char-code)
             (go zero))
            ((< char-code #x800)
-            (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code)))
+            (octet-writer (logior* #b11000000 (ash* char-code -6)))
             (go one))
            ((< char-code #x10000)
-            (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code)))
+            (octet-writer (logior* #b11100000 (ash* char-code -12)))
             (go two))
            (t
-            (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code)))))
-     (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code)))
+            (octet-writer (logior* #b11110000 (ash* char-code -18)))))
+     (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12))))
      two
-     (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code)))
+     (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6))))
      one
-     (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code)))
+     (octet-writer (logior* #b10000000 (logand* #b00111111 char-code)))
      zero)))
 
 (define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
   (flet ((write-word (word)
-           (octet-writer (ldb (byte 8 0) word))
-           (octet-writer (ldb (byte 8 8) word))))
+           (octet-writer (logand* #x00ff word))
+           (octet-writer (ash* (logand* #xff00 word) -8))))
     (declare (inline write-word))
     (let ((char-code (char-code char-getter)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
              (write-word char-code))
             (t (decf char-code #x10000)
-               (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
-               (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
+               (write-word (logior* #xd800 (ash* char-code -10)))
+               (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
 
 (define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
   (flet ((write-word (word)
-           (octet-writer (ldb (byte 8 8) word))
-           (octet-writer (ldb (byte 8 0) word))))
+           (octet-writer (ash* (logand* #xff00 word) -8))
+           (octet-writer (logand* #x00ff word))))
     (declare (inline write-word))
     (let ((char-code (char-code char-getter)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
              (write-word char-code))
             (t (decf char-code #x10000)
-               (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
-               (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
+               (write-word (logior* #xd800 (ash* char-code -10)))
+               (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
 
 (define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
   (let ((char-code (char-code char-getter)))
-    (octet-writer (ldb (byte 8 0) char-code))
-    (octet-writer (ldb (byte 8 8) char-code))
-    (octet-writer (ldb (byte 8 16) char-code))
-    (octet-writer (ldb (byte 8 24) char-code))))
+    (octet-writer (logand* #x00ff char-code))
+    (octet-writer (logand* #x00ff (ash* char-code -8)))
+    (octet-writer (logand* #x00ff (ash* char-code -16)))
+    (octet-writer (logand* #x00ff (ash* char-code -24)))))
 
 (define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
   (let ((char-code (char-code char-getter)))
-    (octet-writer (ldb (byte 8 24) char-code))
-    (octet-writer (ldb (byte 8 16) char-code))
-    (octet-writer (ldb (byte 8 8) char-code))
-    (octet-writer (ldb (byte 8 0) char-code))))
+    (octet-writer (logand* #x00ff (ash* char-code -24)))
+    (octet-writer (logand* #x00ff (ash* char-code -16)))
+    (octet-writer (logand* #x00ff (ash* char-code -8)))
+    (octet-writer (logand* #x00ff char-code))))
 
 (defmethod char-to-octets ((format flexi-cr-mixin) char writer)
   (declare #.*fixnum-optimize-settings*)
Modified: branches/edi/util.lisp
==============================================================================
--- branches/edi/util.lisp	(original)
+++ branches/edi/util.lisp	Sun May 25 17:36:37 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.23 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.24 2008/05/25 21:26:12 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -192,4 +192,16 @@
   "Tries to `rewind' the \(binary) stream STREAM by OCTETS octets.
 Returns a true value if it succeeds."
   (when-let (position (file-position stream))
-    (file-position stream (- position octets))))
\ No newline at end of file
+    (file-position stream (- position octets))))
+
+(defmacro logand* (x y)
+  "Solely for optimization purposes.  Some Lisps need it, some don't."
+  `(the fixnum (logand ,x ,y)))
+
+(defmacro logior* (x y)
+  "Solely for optimization purposes.  Some Lisps need it, some don't."
+  `(the fixnum (logior ,x ,y)))
+
+(defmacro ash* (integer count)
+  "Solely for optimization purposes.  Some Lisps need it, some don't."
+  `(the fixnum (ash ,integer ,count)))
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sun May 25 16:45:09 2008
New Revision: 59
Modified:
   branches/edi/decode.lisp
   branches/edi/encode.lisp
Log:
ANSI compliance fix
Tests pass on ClozureCL and AllegroCL now
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sun May 25 16:45:09 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -67,7 +67,7 @@
 
 (defmethod octets-to-string* :around (format (list list) start end)
   (declare #.*standard-optimize-settings*)
-  (call-next-method format (coerce list 'vector) start end))
+  (octets-to-string* format (coerce list 'vector) start end))
 
 (defmacro define-sequence-readers ((format-class) &body body)
   "Non-hygienic utility macro which defines methods for READ-SEQUENCE*
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sun May 25 16:45:09 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.22 2008/05/25 20:44:03 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -49,7 +49,7 @@
 
 (defmethod string-to-octets* :around (format (list list) start end)
   (declare #.*standard-optimize-settings*)
-  (call-next-method format (coerce list 'string*) start end))
+  (string-to-octets* format (coerce list 'string*) start end))
 
 (defmacro define-sequence-writers ((format-class) &body body)
   "Non-hygienic utility macro which defines methods for
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sun May 25 16:28:25 2008
New Revision: 58
Modified:
   branches/edi/decode.lisp
   branches/edi/doc/index.html
   branches/edi/encode.lisp
   branches/edi/input.lisp
   branches/edi/length.lisp
   branches/edi/mapping.lisp
   branches/edi/strings.lisp
Log:
Optimized the other direction as well
Passes tests on LispWorks
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -60,26 +60,217 @@
 The special variable *CURRENT-UNREADER* must be bound correctly
 whenever this function is called."))
 
-(defmethod octets-to-char-code ((format flexi-latin-1-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
-  (funcall reader))
+(defgeneric octets-to-string* (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "A generic function which dispatches on the external
+format and does the real work for OCTETS-TO-STRING."))
 
-(defmethod octets-to-char-code ((format flexi-ascii-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
-  (when-let (octet (funcall reader))
+(defmethod octets-to-string* :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'vector) start end))
+
+(defmacro define-sequence-readers ((format-class) &body body)
+  "Non-hygienic utility macro which defines methods for READ-SEQUENCE*
+and OCTETS-TO-STRING* for the class FORMAT-CLASS.  BODY is described
+in the docstring of DEFINE-CHAR-ENCODERS but can additionally contain
+a form (UNGET <form>) which has to be replaced by the correct code to
+`unread' the octets for the character designated by <form>."
+  (let* ((body `((block char-decoder
+                   (locally
+                     (declare #.*fixnum-optimize-settings*)
+                     ,@body)))))
+    `(progn
+       (defmethod read-sequence* ((format ,format-class) flexi-input-stream sequence start end)
+         (with-accessors ((position flexi-stream-position)
+                          (bound flexi-stream-bound)
+                          (octet-stack flexi-stream-octet-stack)
+                          (last-octet flexi-stream-last-octet)
+                          (last-char-code flexi-stream-last-char-code)
+                          (stream flexi-stream-stream))
+             flexi-input-stream
+           (let* (buffer
+                  (buffer-pos 0)
+                  (buffer-end 0)
+                  (index start)
+                  ;; whether we will later be able to rewind the stream if
+                  ;; needed (to get rid of unused octets in the buffer)
+                  (can-rewind-p (maybe-rewind stream 0))
+                  (factor (encoding-factor format))
+                  (integer-factor (floor factor))
+                  ;; it's an interesting question whether it makes sense
+                  ;; performance-wise to make RESERVE significantly bigger
+                  ;; (and thus put potentially a lot more octets into
+                  ;; OCTET-STACK), especially for UTF-8
+                  (reserve (cond ((not (floatp factor)) 0)
+                                 ((not can-rewind-p) (* 2 integer-factor))
+                                 (t (ceiling (* (- factor integer-factor) (- end start)))))))
+             (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
+                      (boolean can-rewind-p))
+             (flet ((compute-fill-amount ()
+                      "Computes the amount of octets we can savely read into
+the buffer without violating the stream's bound \(if there is one) and
+without potentially reading much more than we need \(unless we can
+rewind afterwards)."
+                      (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
+                                                                        (the fixnum (- end index))))
+                                                         reserve))
+                                          +buffer-size+)))
+                        (cond (bound (min minimum (- bound position)))
+                              (t minimum))))
+                    (fill-buffer (end)
+                      "Tries to fill the buffer from BUFFER-POS to END and
+returns NIL if the buffer doesn't contain any new data."
+                      ;; put data from octet stack into buffer if there is any
+                      (loop
+                       (when (>= buffer-pos end)
+                         (return))
+                       (let ((next-octet (pop octet-stack)))
+                         (cond (next-octet
+                                (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
+                                (incf buffer-pos))
+                               (t (return)))))
+                      (setq buffer-end (read-sequence buffer stream
+                                                      :start buffer-pos
+                                                      :end end))
+                      ;; BUFFER-POS is only greater than zero if the buffer
+                      ;; already contains unread data from the octet stack
+                      ;; (see below), so we test for ZEROP here and do /not/
+                      ;; compare with BUFFER-POS
+                      (unless (zerop buffer-end)
+                        (incf position buffer-end))))
+               (let ((minimum (compute-fill-amount)))
+                 (declare (fixnum minimum))
+                 (setq buffer (make-octet-buffer minimum))
+                 ;; fill buffer for the first time or return immediately if
+                 ;; we don't succeed
+                 (unless (fill-buffer minimum)
+                   (return-from read-sequence* start)))
+               (setq buffer-pos 0)
+               (macrolet ((iterate (set-place)
+                            "A very unhygienic macro to implement the
+actual iteration through the sequence including housekeeping for the
+flexi stream.  SET-PLACE is the place \(using the index INDEX) used to
+access the sequence."
+                            `(flet ((leave ()
+                                      "This is the function used to
+abort the LOOP iteration below."
+                                      (when (> index start)
+                                        (setq last-octet nil
+                                              last-char-code ,(sublis '((index . (1- index))) set-place)))
+                                      (return-from read-sequence* index)))
+                               (loop
+                                (when (>= index end)
+                                  ;; check if there are octets in the
+                                  ;; buffer we didn't use - see
+                                  ;; COMPUTE-FILL-AMOUNT above
+                                  (let ((rest (- buffer-end buffer-pos)))
+                                    (when (plusp rest)
+                                      (or (and can-rewind-p
+                                               (maybe-rewind stream rest))
+                                          (loop
+                                           (when (>= buffer-pos buffer-end)
+                                             (return))
+                                           (decf buffer-end)
+                                           (push (aref (the (array octet *) buffer) buffer-end)
+                                                 octet-stack)))))
+                                  (leave))
+                                (let ((next-char-code
+                                       (progn (symbol-macrolet
+                                                  ((octet-getter
+                                                    ;; this is the code to retrieve the next octet (or
+                                                    ;; NIL) and to fill the buffer if needed
+                                                    (block next-octet
+                                                      (when (>= buffer-pos buffer-end)
+                                                        (setq buffer-pos 0)
+                                                        (unless (fill-buffer (compute-fill-amount))
+                                                          (return-from next-octet)))
+                                                      (prog1
+                                                          (aref (the (array octet *) buffer) buffer-pos)
+                                                        (incf buffer-pos)))))
+                                                (macrolet ((unget (form)
+                                                             `(unread-char% ,form flexi-input-stream)))
+                                                  ,',@body)))))
+                                  (unless next-char-code
+                                    (leave))
+                                  (setf ,set-place (code-char next-char-code))
+                                  (incf index))))))
+                 (etypecase sequence
+                   (string (iterate (char sequence index)))
+                   (array (iterate (aref sequence index)))
+                   (list (iterate (nth index sequence)))))))))
+       (defmethod octets-to-string* ((format ,format-class) sequence start end)
+         (declare #.*standard-optimize-settings*)
+         (declare (fixnum start end))
+         (let* ((i start)
+                (string-length (compute-number-of-chars format sequence start end nil))
+                (string (make-array string-length :element-type 'char*)))
+           (declare (fixnum i string-length))
+           (loop for j of-type fixnum from 0 below string-length
+                 do (setf (schar string j)
+                          (code-char (macrolet ((unget (form)
+                                                  `(decf i (character-length format ,form))))
+                                       (symbol-macrolet ((octet-getter (and (< i end)
+                                                                            (prog1
+                                                                                (aref sequence i)
+                                                                              (incf i)))))
+                                         ,@body))))
+                 finally (return string)))))))
+
+(defmacro define-char-decoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+  "Non-hygienic utility macro which defines several decoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to read octets and return one
+character.  BODY must contain a symbol OCTET-GETTER representing the
+form which is used to obtain the next octet."
+  `(progn
+     (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+       (declare #.*fixnum-optimize-settings*)
+       (declare (function reader))
+       (symbol-macrolet ((octet-getter (funcall reader)))
+         ,@(sublis '((char-decoder . octets-to-char-code))
+                   body)))
+     (define-sequence-readers (,lf-format-class) ,@body)
+     (define-sequence-readers (,cr-format-class)
+       ,(with-unique-names (char-code)
+          `(let ((,char-code (progn ,@body)))
+             (case ,char-code
+               (#.+cr+ #.(char-code #\Newline))
+               (otherwise ,char-code)))))
+     (define-sequence-readers  (,crlf-format-class)
+       ,(with-unique-names (char-code next-char-code get-char-code)
+          `(flet ((,get-char-code () ,@body))
+             (let ((,char-code (,get-char-code)))
+               (case ,char-code
+                 (#.+cr+
+                  (let ((,next-char-code (,get-char-code)))
+                    (case ,next-char-code
+                      (#.+lf+ #.(char-code #\Newline))
+                      ;; we saw a CR but no LF afterwards, but then the data
+                      ;; ended, so we just return #\Return
+                      ((nil) +cr+)
+                      ;; if the character we peeked at wasn't a
+                      ;; linefeed character we unread its constituents
+                      (otherwise (unget (code-char ,next-char-code))
+                                 ,char-code))))
+                 (otherwise ,char-code))))))))
+
+(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
+  octet-getter)
+
+(define-char-decoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+  (when-let (octet octet-getter)
     (if (> (the octet octet) 127)
       (recover-from-encoding-error format
                                    "No character which corresponds to octet #x~X." octet)
       octet)))
 
-(defmethod octets-to-char-code ((format flexi-8-bit-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
   (with-accessors ((decoding-table external-format-decoding-table))
       format
-    (when-let (octet (funcall reader))
+    (when-let (octet octet-getter)
       (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table)
                              (the octet octet))))
         (if (or (null char-code)
@@ -88,19 +279,17 @@
                                        "No character which corresponds to octet #x~X." octet)
           char-code)))))
 
-(defmethod octets-to-char-code ((format flexi-utf-8-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))  
+(define-char-decoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-8 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (let ((octet (read-next-byte)))
         (declare (type octet octet))
@@ -113,11 +302,7 @@
                    (values (logand octet #b00001111) 2))
                   ((= #b11110000 (logand octet #b11111000))
                    (values (logand octet #b00000111) 3))
-                  ((= #b11111000 (logand octet #b11111100))
-                   (values (logand octet #b00000011) 4))
-                  ((= #b11111100 (logand octet #b11111110))
-                   (values (logand octet #b00000001) 5))
-                  (t (return-from octets-to-char-code
+                  (t (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected value #x~X at start of UTF-8 sequence."
                                                     octet))))
@@ -130,24 +315,22 @@
                 repeat count
                 for octet of-type octet = (read-next-byte)
                 unless (= #b10000000 (logand octet #b11000000))
-                do (return-from octets-to-char-code
+                do (return-from char-decoder
                      (recover-from-encoding-error format
                                                   "Unexpected value #x~X in UTF-8 sequence." octet))
                 finally (return result)))))))
 
-(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-16 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (flet ((read-next-word ()
                (+ (the octet (read-next-byte))
@@ -159,7 +342,7 @@
                  (let ((next-word (read-next-word)))
                    (declare (type (unsigned-byte 16) next-word))
                    (unless (<= #xdc00 next-word #xdfff)
-                     (return-from octets-to-char-code
+                     (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected UTF-16 word #x~X following #x~X."
                                                     next-word word)))
@@ -168,19 +351,17 @@
                       #x10000)))
                 (t word)))))))
 
-(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-16 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (flet ((read-next-word ()
                (+ (ash (the octet (read-next-byte)) 8)
@@ -192,7 +373,7 @@
                  (let ((next-word (read-next-word)))
                    (declare (type (unsigned-byte 16) next-word))
                    (unless (<= #xdc00 next-word #xdfff)
-                     (return-from octets-to-char-code
+                     (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected UTF-16 word #x~X following #x~X."
                                                     next-word word)))
@@ -201,37 +382,33 @@
                       #x10000)))
                 (t word)))))))
 
-(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-32 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (loop for count of-type fixnum from 0 to 24 by 8
             for octet of-type octet = (read-next-byte)
             sum (ash octet count)))))
 
-(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-32 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (loop for count of-type fixnum from 24 downto 0 by 8
             for octet of-type octet = (read-next-byte)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html	(original)
+++ branches/edi/doc/index.html	Sun May 25 16:28:25 2008
@@ -996,7 +996,7 @@
 
 <h4><a name="strings" class=none>Strings</a></h4>
 
-This section collects a few convenience functions for strings conversions:
+This section collects a few convenience functions for strings conversions.
 
 <p><br>[Function]
 <br><a class=none name="string-to-octets"><b>string-to-octets</b> <i>string <tt>&key</tt> external-format start end</i> => <i>vector</i></a>
@@ -1009,7 +1009,9 @@
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the string.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
-
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
 </blockquote>
 
 <p><br>[Function]
@@ -1023,6 +1025,11 @@
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
 </blockquote>
 
 <p><br>[Function]
@@ -1030,14 +1037,17 @@
 
 <blockquote><br>
 
-Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
+Returns the length of the subsequence of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
 <a href="#octet">octets</a> if encoded using
 the <a href="#external-formats">external format</a> designated
 by <code><i>external-format</i></code>.
 The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
-are <code>0</code> and the length of the string.  The default
+are <code>0</code> and the length of <code><i>string</i></code>.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
 </blockquote>
 
 <p><br>[Function]
@@ -1054,6 +1064,11 @@
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
 </blockquote>
 
 <br> <br><h3><a class=none name="position">File positions</a></h3>
@@ -1095,7 +1110,7 @@
 his work on making FLEXI-STREAMS faster.
 
 <p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -47,130 +47,140 @@
   (:documentation "A generic function which dispatches on the external
 format and does the real work for STRING-TO-OCTETS."))
 
+(defmethod string-to-octets* :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'string*) start end))
+
 (defmacro define-sequence-writers ((format-class) &body body)
-  "Utility macro which defines methods for WRITE-SEQUENCE* and
-STRING-TO-OCTET* for the class FORMAT-CLASS.  For BODY see the
-docstring of DEFINE-CHAR-ENCODERS."
-  `(progn
-     (defmethod write-sequence* ((format ,format-class) stream sequence start end)
-       (declare #.*standard-optimize-settings*)
-       (declare (fixnum start end))
-       (with-accessors ((column flexi-stream-column))
-           stream
-         (let* ((octet-seen-p nil)
-                (buffer-pos 0)
-                ;; estimate should be good enough...
-                (factor (encoding-factor format))
-                ;; we don't want arbitrarily large buffer, do we?
-                (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
-                (buffer (make-octet-buffer buffer-size)))
-           (declare (fixnum buffer-pos buffer-size)
-                    (boolean octet-seen-p)
-                    (type (array octet *) buffer))
-           (macrolet ((octet-writer (form)
-                        `(write-octet ,form)))
-             (labels ((flush-buffer ()
-                        "Sends all octets in BUFFER to the underlying stream."
-                        (write-sequence buffer stream :end buffer-pos)
-                        (setq buffer-pos 0))
-                      (write-octet (octet)
-                        "Adds one octet to the buffer and flushes it if necessary."
-                        (declare (type octet octet))
-                        (when (>= buffer-pos buffer-size)
-                          (flush-buffer))
-                        (setf (aref buffer buffer-pos) octet)
-                        (incf buffer-pos))
-                      (write-object (object)
-                        "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
-                        (etypecase object
-                          (octet (setq octet-seen-p t)
-                                 (write-octet object))
-                          (character (symbol-macrolet ((char-getter object))
-                                       ,@body)))))
-               (macrolet ((iterate (&body output-forms)
-                            "An unhygienic macro to implement the actual
-iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
-                            `(loop for index of-type fixnum from start below end
-                                   do (progn ,@output-forms)
-                                   finally (when (plusp buffer-pos)
-                                             (flush-buffer)))))
-                 (etypecase sequence
-                   (string (iterate
-                            (symbol-macrolet ((char-getter (char sequence index)))
-                              ,@body)))
-                   (array (iterate
-                           (symbol-macrolet ((char-getter (aref sequence index)))
-                             ,@body)))
-                   (list  (iterate (write-object (nth index sequence))))))
-               ;; update the column slot, setting it to NIL if we sent
-               ;; octets
-               (setq column
-                     (cond (octet-seen-p nil)
-                           (t (let ((last-newline-pos (position #\Newline sequence
-                                                                :test #'char=
-                                                                :start start
-                                                                :end end
-                                                                :from-end t)))
-                                (cond (last-newline-pos (- end last-newline-pos 1))
-                                      (column (+ column (- end start)))))))))))))  
-     (defmethod string-to-octets* ((format ,format-class) string start end)
-       (declare #.*standard-optimize-settings*)
-       (declare (fixnum start end) (string string))
-       (let ((octets (make-array (compute-number-of-octets format string start end)
-                                 :element-type 'octet))
-             (j 0))
-         (declare (fixnum j))
-         (loop for i of-type fixnum from start below end do
-               (macrolet ((octet-writer (form)
-                            `(progn
-                               (setf (aref (the (array octet *) octets) j) ,form)
-                               (incf j))))
-                 (symbol-macrolet ((char-getter (char string i)))
-                   (progn ,@body))))
-         octets))))
-
-;; char-getter can be called more than once - no side effects
-(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
-  "Utility macro which defines several encoding-related methods for
-the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
-it is assumed that CR-FORMAT-CLASS is the same encoding as
-FORMAT-CLASS but with CR line endings and similar for
-CRLF-FORMAT-CLASS.  BODY is a code template for the code to convert
-one character to octets.  BODY must contain a symbol CHAR-GETTER
-representing the form which is used to obtain the character and a
-forms like \(OCTET-WRITE <thing>) to write the octet <thing>.  The
-CHAR-GETTER form might be called more than once."
+  "Non-hygienic utility macro which defines methods for
+WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS.  For
+BODY see the docstring of DEFINE-CHAR-ENCODERS."
   (let ((body `((locally
                   (declare #.*fixnum-optimize-settings*)
                   ,@body))))
     `(progn
-       (defmethod char-to-octets ((format ,format-class) char writer)
-         (declare (character char) (function writer))
-         (symbol-macrolet ((char-getter char))
-           (macrolet ((octet-writer (form)
-                        `(funcall writer ,form)))
-             ,@body)))
-       (define-sequence-writers (,format-class) ,@body)
-       (define-sequence-writers (,cr-format-class)
-         ,@(sublis `((char-getter . ,(with-unique-names (char)
-                                       `(let ((,char char-getter))
-                                          (declare (character ,char))
-                                          (if (char= ,char #\Newline)
-                                            #\Return
-                                            ,char)))))
-                   body))
-       (define-sequence-writers (,crlf-format-class)
-         ,(with-unique-names (char write-char)
-            `(flet ((,write-char (,char)
-                      ,@(sublis `((char-getter . ,char)) body)))
-               (let ((,char char-getter))
-                 (declare (character ,char))
-                 (cond ((char= ,char #\Newline)
-                        (,write-char #\Return)
-                        (,write-char #\Newline))
-                       (t (,write-char ,char))))))))))
+       (defmethod string-to-octets* ((format ,format-class) string start end)
+         (declare #.*standard-optimize-settings*)
+         (declare (fixnum start end) (string string))
+         (let ((octets (make-array (compute-number-of-octets format string start end)
+                                   :element-type 'octet))
+               (j 0))
+           (declare (fixnum j))
+           (loop for i of-type fixnum from start below end do
+                 (macrolet ((octet-writer (form)
+                              `(progn
+                                 (setf (aref (the (array octet *) octets) j) ,form)
+                                 (incf j))))
+                   (symbol-macrolet ((char-getter (char string i)))
+                     (progn ,@body))))
+           octets)) 
+       (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+         (declare #.*standard-optimize-settings*)
+         (declare (fixnum start end))
+         (with-accessors ((column flexi-stream-column))
+             stream
+           (let* ((octet-seen-p nil)
+                  (buffer-pos 0)
+                  ;; estimate should be good enough...
+                  (factor (encoding-factor format))
+                  ;; we don't want arbitrarily large buffer, do we?
+                  (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+                  (buffer (make-octet-buffer buffer-size)))
+             (declare (fixnum buffer-pos buffer-size)
+                      (boolean octet-seen-p)
+                      (type (array octet *) buffer))
+             (macrolet ((octet-writer (form)
+                          `(write-octet ,form)))
+               (labels ((flush-buffer ()
+                          "Sends all octets in BUFFER to the underlying stream."
+                          (write-sequence buffer stream :end buffer-pos)
+                          (setq buffer-pos 0))
+                        (write-octet (octet)
+                          "Adds one octet to the buffer and flushes it if necessary."
+                          (declare (type octet octet))
+                          (when (>= buffer-pos buffer-size)
+                            (flush-buffer))
+                          (setf (aref buffer buffer-pos) octet)
+                          (incf buffer-pos))
+                        (write-object (object)
+                          "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+                          (etypecase object
+                            (octet (setq octet-seen-p t)
+                                   (write-octet object))
+                            (character (symbol-macrolet ((char-getter object))
+                                         ,@body)))))
+                 (macrolet ((iterate (&body output-forms)
+                              "An unhygienic macro to implement the actual
+iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+                              `(loop for index of-type fixnum from start below end
+                                     do (progn ,@output-forms)
+                                     finally (when (plusp buffer-pos)
+                                               (flush-buffer)))))
+                   (etypecase sequence
+                     (string (iterate
+                              (symbol-macrolet ((char-getter (char sequence index)))
+                                ,@body)))
+                     (array (iterate
+                             (symbol-macrolet ((char-getter (aref sequence index)))
+                               ,@body)))
+                     (list  (iterate (write-object (nth index sequence))))))
+                 ;; update the column slot, setting it to NIL if we sent
+                 ;; octets
+                 (setq column
+                       (cond (octet-seen-p nil)
+                             (t (let ((last-newline-pos (position #\Newline sequence
+                                                                  :test #'char=
+                                                                  :start start
+                                                                  :end end
+                                                                  :from-end t)))
+                                  (cond (last-newline-pos (- end last-newline-pos 1))
+                                        (column (+ column (- end start))))))))))))))))
+
+(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+  "Non-hygienic utility macro which defines several encoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to convert one character to
+octets.  BODY must contain a symbol CHAR-GETTER representing the form
+which is used to obtain the character and a forms like \(OCTET-WRITE
+<thing>) to write the octet <thing>.  The CHAR-GETTER form might be
+called more than once."
+  `(progn
+     (defmethod char-to-octets ((format ,lf-format-class) char writer)
+       (declare #.*fixnum-optimize-settings*)
+       (declare (character char) (function writer))
+       (symbol-macrolet ((char-getter char))
+         (macrolet ((octet-writer (form)
+                      `(funcall writer ,form)))
+           ,@body)))
+     (define-sequence-writers (,lf-format-class) ,@body)
+     (define-sequence-writers (,cr-format-class)
+       ;; modify the body so that the getter replaces a #\Newline
+       ;; with a #\Return
+       ,@(sublis `((char-getter . ,(with-unique-names (char)
+                                     `(let ((,char char-getter))
+                                        (declare (character ,char))
+                                        (if (char= ,char #\Newline)
+                                          #\Return
+                                          ,char)))))
+                 body))
+     (define-sequence-writers (,crlf-format-class)
+       ;; modify the body so that we potentially write octets for
+       ;; two characters (#\Return and #\Linefeed) - the original
+       ;; body is wrapped with the WRITE-CHAR local function
+       ,(with-unique-names (char write-char)
+          `(flet ((,write-char (,char)
+                    ,@(sublis `((char-getter . ,char)) body)))
+             (let ((,char char-getter))
+               (declare (character ,char))
+               (cond ((char= ,char #\Newline)
+                      (,write-char #\Return)
+                      (,write-char #\Linefeed))
+                     (t (,write-char ,char)))))))))
 
 (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format  flexi-crlf-latin-1-format)
   (let ((octet (char-code char-getter)))
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp	(original)
+++ branches/edi/input.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.78 2008/05/25 19:25:44 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -201,9 +201,7 @@
 others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
-  (with-accessors ((position flexi-stream-position)
-                   (bound flexi-stream-bound)
-                   (octet-stack flexi-stream-octet-stack)
+  (with-accessors ((octet-stack flexi-stream-octet-stack)
                    (external-format flexi-stream-external-format)
                    (last-octet flexi-stream-last-octet)
                    (last-char-code flexi-stream-last-char-code)
@@ -233,116 +231,8 @@
           (setq last-char-code nil
                 last-octet (elt sequence (1- index))))
         (return-from stream-read-sequence index)))
-    (let* (buffer
-           (buffer-pos 0)
-           (buffer-end 0)
-           (index start)
-           ;; whether we will later be able to rewind the stream if
-           ;; needed (to get rid of unused octets in the buffer)
-           (can-rewind-p (maybe-rewind stream 0))
-           (factor (encoding-factor external-format))
-           (integer-factor (floor factor))
-           ;; it's an interesting question whether it makes sense
-           ;; performance-wise to make RESERVE significantly bigger
-           ;; (and thus put potentially a lot more octets into
-           ;; OCTET-STACK), especially for UTF-8
-           (reserve (cond ((not (floatp factor)) 0)
-                          ((not can-rewind-p) (* 2 integer-factor))
-                          (t (ceiling (* (- factor integer-factor) (- end start)))))))
-      (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
-               (boolean can-rewind-p))
-      (flet ((compute-fill-amount ()
-               "Computes the amount of octets we can savely read into
-the buffer without violating the stream's bound \(if there is one) and
-without potentially reading much more than we need \(unless we can
-rewind afterwards)."
-               (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
-                                                                 (the fixnum (- end index))))
-                                                  reserve))
-                                   +buffer-size+)))
-                 (cond (bound (min minimum (- bound position)))
-                       (t minimum))))
-             (fill-buffer (end)
-               "Tries to fill the buffer from BUFFER-POS to END and
-returns NIL if the buffer doesn't contain any new data."
-               ;; put data from octet stack into buffer if there is any
-               (loop
-                (when (>= buffer-pos end)
-                  (return))
-                (let ((next-octet (pop octet-stack)))
-                  (cond (next-octet
-                         (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
-                         (incf buffer-pos))
-                        (t (return)))))
-               (setq buffer-end (read-sequence buffer stream
-                                               :start buffer-pos
-                                               :end end))
-               ;; BUFFER-POS is only greater than zero if the buffer
-               ;; already contains unread data from the octet stack
-               ;; (see below), so we test for ZEROP here and do /not/
-               ;; compare with BUFFER-POS
-               (unless (zerop buffer-end)
-                 (incf position buffer-end))))
-        (let ((minimum (compute-fill-amount)))
-          (declare (fixnum minimum))
-          (setq buffer (make-octet-buffer minimum))
-          ;; fill buffer for the first time or return immediately if
-          ;; we don't succeed
-          (unless (fill-buffer minimum)
-            (return-from stream-read-sequence start)))
-        (setq buffer-pos 0)
-        (flet ((next-octet ()
-                 "Returns the next octet from the buffer and fills it
-if it is exhausted.  Returns NIL if there's no more data on the
-stream."
-                 (when (>= buffer-pos buffer-end)
-                   (setq buffer-pos 0)
-                   (unless (fill-buffer (compute-fill-amount))
-                     (return-from next-octet)))
-                 (prog1
-                     (aref (the (array octet *) buffer) buffer-pos)
-                   (incf buffer-pos)))
-               (unreader (char)
-                 (unread-char% char flexi-input-stream)))
-          (declare (dynamic-extent (function next-octet) (function unreader)))
-          (let ((*current-unreader* #'unreader))
-            (macrolet ((iterate (set-place)
-                         "A very unhygienic macro to implement the
-actual iteration through the sequence including housekeeping for the
-flexi stream.  SET-PLACE is the place \(using the index INDEX) used to
-access the sequence."
-                         `(flet ((leave ()
-                                   "This is the function used to abort
-the LOOP iteration below."
-                                   (when (> index start)
-                                     (setq last-octet nil
-                                           last-char-code ,(sublis '((index . (1- index))) set-place)))
-                                   (return-from stream-read-sequence index)))
-                            (loop
-                             (when (>= index end)
-                               ;; check if there are octets in the
-                               ;; buffer we didn't use - see
-                               ;; COMPUTE-FILL-AMOUNT above
-                               (let ((rest (- buffer-end buffer-pos)))
-                                 (when (plusp rest)
-                                   (or (and can-rewind-p
-                                            (maybe-rewind stream rest))
-                                       (loop
-                                        (when (>= buffer-pos buffer-end)
-                                          (return))
-                                        (decf buffer-end)
-                                        (push (aref (the (array octet *) buffer) buffer-end)
-                                              octet-stack)))))
-                               (leave))
-                             (let ((next-char-code (octets-to-char-code external-format #'next-octet)))
-                               (unless next-char-code
-                                 (leave))
-                               (setf ,set-place (code-char next-char-code))
-                               (incf index))))))
-              (etypecase sequence
-                (string (iterate (char sequence index)))
-                (array (iterate (aref sequence index)))
-                (list (iterate (nth index sequence)))))))))))
+    ;; otherwise hand over to the external format to do the work
+    (read-sequence* external-format flexi-input-stream sequence start end)))
 
 (defmethod stream-unread-char ((stream flexi-input-stream) char)
   "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM.
Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp	(original)
+++ branches/edi/length.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -115,7 +115,7 @@
   ;; formats with CRLF line endings have their own specialized methods
   ;; below
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore warnp))
   (let ((i start)
         (length (- end start)))
@@ -132,7 +132,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
@@ -152,7 +152,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start)
         (last-octet 0))
@@ -175,7 +175,7 @@
 
 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore sequence))
   (when (and warnp (oddp (- end start)))
     (signal-encoding-warning format "~A octet~:P cannot be decoded ~
@@ -203,7 +203,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
@@ -222,7 +222,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start)
         (last-octet 0))
@@ -248,7 +248,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start)
         (last-octet 0))
@@ -290,7 +290,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
@@ -308,7 +308,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
@@ -330,22 +330,26 @@
 encode the sequence of characters in SEQUENCE from START to END using
 the external format FORMAT."))
 
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+(defmethod compute-number-of-octets :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'string*) start end))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence))  
+  (declare (ignore string))  
   (- end start))
 
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((< char-code #x80) 1)
                                ((< char-code #x800) 2)
                                ((< char-code #x10000) 3)
@@ -355,16 +359,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
                                ((< char-code #x80) 1)
                                ((< char-code #x800) 2)
@@ -375,16 +379,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((< char-code #x10000) 2)
                                (t 4))))
        (declare (fixnum char-length) (type char-code-integer char-code))
@@ -392,16 +396,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
                                ((< char-code #x10000) 2)
                                (t 4))))
@@ -410,16 +414,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
                                ((< char-code #x10000) 2)
                                (t 4))))
@@ -428,17 +432,39 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence))
+  (declare (ignore string))
   (* 4 (- end start)))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (+ (call-next-method)
      (* (case (external-format-name format)
           (:utf-32 4)
           (otherwise 1))
-        (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
+        (count #\Newline string :start start :end end :test #'char=))))
+
+(defgeneric character-length (format char)
+  (declare #.*fixnum-optimize-settings*)
+  (:documentation "Returns the number of octets needed to encode the
+single character CHAR.")
+  (:method (format char)
+   (compute-number-of-octets format (string char) 0 1)))
+
+(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline)))
+  (declare #.*fixnum-optimize-settings*)
+  (+ (call-next-method format +cr+)
+     (call-next-method format +lf+)))
+
+(defmethod character-length ((format flexi-8-bit-format) char)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (ignore char))
+  1)
+
+(defmethod character-length ((format flexi-utf-32-format) char)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (ignore char))
+  4)
\ No newline at end of file
Modified: branches/edi/mapping.lisp
==============================================================================
--- branches/edi/mapping.lisp	(original)
+++ branches/edi/mapping.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.3 2008/05/25 19:07:53 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -39,6 +39,12 @@
   #+:lispworks 'lw:simple-char
   #-:lispworks 'character)
 
+(deftype string* ()
+  "Convenience shortcut to paper over the difference between LispWorks
+and the other Lisps."
+  #+:lispworks 'lw:text-string
+  #-:lispworks 'string)
+
 (deftype char-code-integer ()
   "The subtype of integers which can be returned by the function CHAR-CODE."
   '(integer 0 #.(1- char-code-limit)))
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -34,7 +34,10 @@
                                 (start 0) (end (length string)))
   "Converts the Lisp string STRING from START to END to an array of
 octets corresponding to the external format designated by
-EXTERNAL-FORMAT."
+EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
   (declare #.*standard-optimize-settings*)
   (declare (string string))
   (setq external-format (maybe-convert-external-format external-format))
@@ -45,51 +48,22 @@
                                   (external-format :latin1)
                                   (start 0) (end (length sequence)))
   "Converts the Lisp sequence SEQUENCE of octets from START to END to
-a string using the external format designated by EXTERNAL-FORMAT."
+a string using the external format designated by EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))
-  (let* ((i start)
-         (reader (etypecase sequence
-                   ((array octet *)
-                    (lambda ()
-                      (and (< i end)
-                           (prog1
-                               (aref (the (array octet *) sequence) i)
-                             (incf i)))))
-                   ((array * *)
-                    (lambda ()
-                      (and (< i end)
-                           (prog1
-                               (aref sequence i)
-                             (incf i)))))
-                   (list
-                    (lambda ()
-                      (and (< i end)
-                           (prog1
-                               (nth i sequence)
-                             (incf i))))))))
-    (declare (fixnum i) (dynamic-extent reader))
-    (labels ((pseudo-writer (octet)
-               (declare (ignore octet))
-               (decf i))
-             (unreader (char)
-               (char-to-octets external-format char #'pseudo-writer)))
-      (declare (dynamic-extent (function pseudo-writer) (function unreader)))
-      (let ((*current-unreader* #'unreader))
-        (flet ((next-char ()
-                 (code-char (octets-to-char-code external-format reader))))
-          (declare (inline next-char))
-          (let* ((string-length (compute-number-of-chars external-format sequence start end nil))
-                 (string (make-array string-length :element-type 'char*)))
-            (declare (fixnum string-length))
-            (loop for j of-type fixnum from 0 below string-length
-                  do (setf (schar string j) (next-char))
-                  finally (return string))))))))
+  ;; the external format knows how to do it...
+  (octets-to-string* external-format sequence start end))
 
 (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
   "Returns the length of the substring of STRING from START to END in
-octets if encoded using the external format EXTERNAL-FORMAT."
+octets if encoded using the external format EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end) (string string))
   (setq external-format (maybe-convert-external-format external-format))
@@ -98,7 +72,10 @@
 (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
   "Kind of the inverse of OCTET-LENGTH.  Returns the length of the
 subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT."
+if decoded using the external format EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sun May 25 08:26:47 2008
New Revision: 57
Added:
   branches/edi/length.lisp   (contents, props changed)
Modified:
   branches/edi/decode.lisp
   branches/edi/encode.lisp
   branches/edi/external-format.lisp
   branches/edi/flexi-streams.asd
Log:
Re-org
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,256 +29,6 @@
 
 (in-package :flexi-streams)
 
-(defgeneric compute-number-of-chars (format sequence start end warnp)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Computes the exact number of characters required to
-decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT.  If WARNP is NIL, warnings will be muffled."))
-
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
-  (declare #.*standard-optimize-settings*)
-  (call-next-method format (coerce list 'vector) start end warnp))
-
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence warnp))
-  (- end start))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
-  ;; this method only applies to the 8-bit formats as all other
-  ;; formats with CRLF line endings have their own specialized methods
-  ;; below
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore warnp))
-  (let ((i start)
-        (length (- end start)))
-    (declare (fixnum i length))
-    (loop
-     (when (>= i end)
-       (return))
-     (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
-       (unless position
-         (return))
-       (setq i (1+ position))
-       (decf length)))
-    length))
-
-(defgeneric check-end (format start end i warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (:method (format start end i warnp)
-   (declare #.*fixnum-optimize-settings*)
-   (declare (fixnum start end i))
-   (when (and warnp (> i end))
-     (signal-encoding-warning format "These ~A octet~:P can't be ~
-decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
-at then end."
-                              (- end start)
-                              (external-format-name format)
-                              (- i end))))
-  (:method ((format flexi-utf-16-format) start end i warnp)
-   (declare #.*fixnum-optimize-settings*)
-   (declare (fixnum start end i))
-   (declare (ignore i warnp))
-   ;; don't warn twice
-   (when (evenp (- end start))
-     (call-next-method))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((octet (aref sequence i))
-            (length (cond ((not (logbitp 7 octet)) 1)
-                          ((= #b11000000 (logand octet #b11100000)) 2)
-                          ((= #b11100000 (logand octet #b11110000)) 3)
-                          (t 4))))
-       (declare (fixnum length) (type octet octet))
-       (incf sum)
-       (incf i length)))
-    (check-end format start end i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start)
-        (last-octet 0))
-    (declare (fixnum i sum) (type octet last-octet))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((octet (aref sequence i))
-            (length (cond ((not (logbitp 7 octet)) 1)
-                          ((= #b11000000 (logand octet #b11100000)) 2)
-                          ((= #b11100000 (logand octet #b11110000)) 3)
-                          (t 4))))
-       (declare (fixnum length) (type octet octet))
-       (unless (and (= octet +lf+) (= last-octet +cr+))
-         (incf sum))
-       (incf i length)
-       (setq last-octet octet)))
-    (check-end format start end i warnp)
-    sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))
-  (when (and warnp (oddp (- end start)))
-    (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-16 as ~:*~A is not even."
-                             (- end start))))  
-  
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence (1+ i)))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (incf sum)
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence i))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (incf sum)
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start)
-        (last-octet 0))
-    (declare (fixnum i sum) (type octet last-octet))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence (1+ i)))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (unless (and (zerop high-octet)
-                    (= (the octet (aref sequence i)) +lf+)
-                    (= last-octet +cr+))         
-         (incf sum))
-       (setq last-octet (if (zerop high-octet)
-                          (aref sequence i)
-                          0))
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start)
-        (last-octet 0))
-    (declare (fixnum i sum) (type octet last-octet))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence i))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (unless (and (zerop high-octet)
-                    (= (the octet (aref sequence (1+ i))) +lf+)
-                    (= last-octet +cr+))
-         (incf sum))
-       (setq last-octet (if (zerop high-octet)
-                          (aref sequence (1+ i))
-                          0))
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))
-  (let ((length (- end start)))
-    (when (and warnp (plusp (mod length 4)))
-      (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-32 as ~:*~A is not a multiple-value of four."
-                               length))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence warnp))
-  (ceiling (- end start) 4))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore warnp))
-  (let ((i start)
-        (length (ceiling (- end start) 4)))
-    (decf end 8)
-    (loop
-     (when (> i end)
-       (return))
-     (cond ((loop for j of-type fixnum from i
-                  for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
-                  always (= octet (aref sequence j)))
-            (decf length)
-            (incf i 8))
-           (t (incf i 4))))
-    length))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore warnp))
-  (let ((i start)
-        (length (ceiling (- end start) 4)))
-    (decf end 8)
-    (loop
-     (when (> i end)
-       (return))
-     (cond ((loop for j of-type fixnum from i
-                  for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
-                  always (= octet (aref sequence j)))
-            (decf length)
-            (incf i 8))
-           (t (incf i 4))))
-    length))
-
 (defun recover-from-encoding-error (external-format format-control &rest format-args)
   "Helper function used by OCTETS-TO-CHAR-CODE below to deal with
 encoding errors.  Checks if *SUBSTITUTION-CHAR* is not NIL and returns
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,125 +29,6 @@
 
 (in-package :flexi-streams)
 
-(defgeneric compute-number-of-octets (format sequence start end)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Computes the exact number of octets required to
-encode the sequence of characters in SEQUENCE from START to END using
-the external format FORMAT."))
-
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))  
-  (- end start))
-
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((< char-code #x80) 1)
-                               ((< char-code #x800) 2)
-                               ((< char-code #x10000) 3)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
-                               ((< char-code #x80) 1)
-                               ((< char-code #x800) 2)
-                               ((< char-code #x10000) 3)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((< char-code #x10000) 2)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
-                               ((< char-code #x10000) 2)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
-                               ((< char-code #x10000) 2)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))
-  (* 4 (- end start)))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (+ (call-next-method)
-     (* (case (external-format-name format)
-          (:utf-32 4)
-          (otherwise 1))
-        (count #\Newline sequence :start start :end end :test #'char=))))
-
 (defgeneric char-to-octets (format char writer)
   (declare #.*standard-optimize-settings*)
   (:documentation "Converts the character CHAR to a sequence of octets
Modified: branches/edi/external-format.lisp
==============================================================================
--- branches/edi/external-format.lisp	(original)
+++ branches/edi/external-format.lisp	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.22 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -387,46 +387,3 @@
 NORMALIZE-EXTERNAL-FORMAT."
   (print-unreadable-object (object stream :type t :identity t)
     (prin1 (normalize-external-format object) stream)))
-
-(defgeneric encoding-factor (format)
-  (:documentation "Given an external format FORMAT, returns a factor
-which denotes the octets to characters ratio to expect when
-encoding/decoding.  If the returned value is an integer, the factor is
-assumed to be exact.  If it is a \(double) float, the factor is
-supposed to be based on heuristics and usually not exact.
-
-This factor is used in string.lisp.")
-  (declare #.*standard-optimize-settings*))
-
-(defmethod encoding-factor ((format flexi-8-bit-format))
-  (declare #.*standard-optimize-settings*)
-  ;; 8-bit encodings map octets to characters in an exact one-to-one
-  ;; fashion
-  1)
-
-(defmethod encoding-factor ((format flexi-utf-8-format))
-  (declare #.*standard-optimize-settings*)
-  ;; UTF-8 characters can be anything from one to six octets, but we
-  ;; assume that the "overhead" is only about 5 percent - this
-  ;; estimate is obviously very much dependant on the content
-  1.05d0)
-
-(defmethod encoding-factor ((format flexi-utf-16-format))
-  (declare #.*standard-optimize-settings*)
-  ;; usually one character maps to two octets, but characters with
-  ;; code points above #x10000 map to four octets - we assume that we
-  ;; usually don't see these characters but of course have to return a
-  ;; float
-  2.0d0)
-
-(defmethod encoding-factor ((format flexi-utf-32-format))
-  (declare #.*standard-optimize-settings*)
-  ;; UTF-32 always matches every character to four octets
-  4)
-
-(defmethod encoding-factor ((format flexi-crlf-mixin))
-  (declare #.*standard-optimize-settings*)
-  ;; if the sequence #\Return #\Linefeed is the line-end marker, this
-  ;; obviously makes encodings potentially longer and definitely makes
-  ;; the estimate unexact
-  (* 1.02d0 (call-next-method)))
Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd	(original)
+++ branches/edi/flexi-streams.asd	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -47,6 +47,7 @@
                (:file "util")
                (:file "conditions")
                (:file "external-format")
+               (:file "length")
                (:file "encode")
                (:file "decode")
                (:file "in-memory")
Added: branches/edi/length.lisp
==============================================================================
--- (empty file)
+++ branches/edi/length.lisp	Sun May 25 08:26:47 2008
@@ -0,0 +1,444 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(defgeneric encoding-factor (format)
+  (:documentation "Given an external format FORMAT, returns a factor
+which denotes the octets to characters ratio to expect when
+encoding/decoding.  If the returned value is an integer, the factor is
+assumed to be exact.  If it is a \(double) float, the factor is
+supposed to be based on heuristics and usually not exact.
+
+This factor is used in string.lisp.")
+  (declare #.*standard-optimize-settings*))
+
+(defmethod encoding-factor ((format flexi-8-bit-format))
+  (declare #.*standard-optimize-settings*)
+  ;; 8-bit encodings map octets to characters in an exact one-to-one
+  ;; fashion
+  1)
+
+(defmethod encoding-factor ((format flexi-utf-8-format))
+  (declare #.*standard-optimize-settings*)
+  ;; UTF-8 characters can be anything from one to six octets, but we
+  ;; assume that the "overhead" is only about 5 percent - this
+  ;; estimate is obviously very much dependant on the content
+  1.05d0)
+
+(defmethod encoding-factor ((format flexi-utf-16-format))
+  (declare #.*standard-optimize-settings*)
+  ;; usually one character maps to two octets, but characters with
+  ;; code points above #x10000 map to four octets - we assume that we
+  ;; usually don't see these characters but of course have to return a
+  ;; float
+  2.0d0)
+
+(defmethod encoding-factor ((format flexi-utf-32-format))
+  (declare #.*standard-optimize-settings*)
+  ;; UTF-32 always matches every character to four octets
+  4)
+
+(defmethod encoding-factor ((format flexi-crlf-mixin))
+  (declare #.*standard-optimize-settings*)
+  ;; if the sequence #\Return #\Linefeed is the line-end marker, this
+  ;; obviously makes encodings potentially longer and definitely makes
+  ;; the estimate unexact
+  (* 1.02d0 (call-next-method)))
+
+(defgeneric check-end (format start end i warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (:documentation "Helper function used below to determine if we tried
+to read past the end of the sequence.")
+  (:method (format start end i warnp)
+   (declare #.*fixnum-optimize-settings*)
+   (declare (fixnum start end i))
+   (when (and warnp (> i end))
+     (signal-encoding-warning format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
+at then end."
+                              (- end start)
+                              (external-format-name format)
+                              (- i end))))
+  (:method ((format flexi-utf-16-format) start end i warnp)
+   (declare #.*fixnum-optimize-settings*)
+   (declare (fixnum start end i))
+   (declare (ignore i warnp))
+   ;; don't warn twice
+   (when (evenp (- end start))
+     (call-next-method))))
+
+(defgeneric compute-number-of-chars (format sequence start end warnp)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of characters required to
+decode the sequence of octets in SEQUENCE from START to END using the
+external format FORMAT.  If WARNP is NIL, warnings will be muffled."))
+
+(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'vector) start end warnp))
+
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence warnp))
+  (- end start))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+  ;; this method only applies to the 8-bit formats as all other
+  ;; formats with CRLF line endings have their own specialized methods
+  ;; below
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore warnp))
+  (let ((i start)
+        (length (- end start)))
+    (declare (fixnum i length))
+    (loop
+     (when (>= i end)
+       (return))
+     (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
+       (unless position
+         (return))
+       (setq i (1+ position))
+       (decf length)))
+    length))
+
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((octet (aref sequence i))
+            (length (cond ((not (logbitp 7 octet)) 1)
+                          ((= #b11000000 (logand octet #b11100000)) 2)
+                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          (t 4))))
+       (declare (fixnum length) (type octet octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start end i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (type octet last-octet))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((octet (aref sequence i))
+            (length (cond ((not (logbitp 7 octet)) 1)
+                          ((= #b11000000 (logand octet #b11100000)) 2)
+                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          (t 4))))
+       (declare (fixnum length) (type octet octet))
+       (unless (and (= octet +lf+) (= last-octet +cr+))
+         (incf sum))
+       (incf i length)
+       (setq last-octet octet)))
+    (check-end format start end i warnp)
+    sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (when (and warnp (oddp (- end start)))
+    (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-16 as ~:*~A is not even."
+                             (- end start))))  
+  
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence (1+ i)))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence i))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (type octet last-octet))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence (1+ i)))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (unless (and (zerop high-octet)
+                    (= (the octet (aref sequence i)) +lf+)
+                    (= last-octet +cr+))         
+         (incf sum))
+       (setq last-octet (if (zerop high-octet)
+                          (aref sequence i)
+                          0))
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (type octet last-octet))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence i))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (unless (and (zerop high-octet)
+                    (= (the octet (aref sequence (1+ i))) +lf+)
+                    (= last-octet +cr+))
+         (incf sum))
+       (setq last-octet (if (zerop high-octet)
+                          (aref sequence (1+ i))
+                          0))
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (let ((length (- end start)))
+    (when (and warnp (plusp (mod length 4)))
+      (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-32 as ~:*~A is not a multiple-value of four."
+                               length))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence warnp))
+  (ceiling (- end start) 4))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore warnp))
+  (let ((i start)
+        (length (ceiling (- end start) 4)))
+    (decf end 8)
+    (loop
+     (when (> i end)
+       (return))
+     (cond ((loop for j of-type fixnum from i
+                  for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
+                  always (= octet (aref sequence j)))
+            (decf length)
+            (incf i 8))
+           (t (incf i 4))))
+    length))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore warnp))
+  (let ((i start)
+        (length (ceiling (- end start) 4)))
+    (decf end 8)
+    (loop
+     (when (> i end)
+       (return))
+     (cond ((loop for j of-type fixnum from i
+                  for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
+                  always (= octet (aref sequence j)))
+            (decf length)
+            (incf i 8))
+           (t (incf i 4))))
+    length))
+
+(defgeneric compute-number-of-octets (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))  
+  (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+                               ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (+ (call-next-method)
+     (* (case (external-format-name format)
+          (:utf-32 4)
+          (otherwise 1))
+        (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sat May 24 23:35:21 2008
New Revision: 56
Modified:
   branches/edi/decode.lisp
   branches/edi/encode.lisp
   branches/edi/input.lisp
   branches/edi/strings.lisp
Log:
Some cosmetic fixes
Passes tests on AllegroCL as well now
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.19 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -42,7 +42,7 @@
 (defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence))  
+  (declare (ignore sequence warnp))
   (- end start))
 
 (defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
@@ -51,6 +51,7 @@
   ;; below
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
+  (declare (ignore warnp))
   (let ((i start)
         (length (- end start)))
     (declare (fixnum i length))
@@ -66,8 +67,9 @@
 
 (defgeneric check-end (format start end i warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end i))
   (:method (format start end i warnp)
+   (declare #.*fixnum-optimize-settings*)
+   (declare (fixnum start end i))
    (when (and warnp (> i end))
      (signal-encoding-warning format "These ~A octet~:P can't be ~
 decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
@@ -76,6 +78,9 @@
                               (external-format-name format)
                               (- i end))))
   (:method ((format flexi-utf-16-format) start end i warnp)
+   (declare #.*fixnum-optimize-settings*)
+   (declare (fixnum start end i))
+   (declare (ignore i warnp))
    ;; don't warn twice
    (when (evenp (- end start))
      (call-next-method))))
@@ -94,7 +99,7 @@
                           ((= #b11000000 (logand octet #b11100000)) 2)
                           ((= #b11100000 (logand octet #b11110000)) 3)
                           (t 4))))
-       (declare (fixnum length) (octet octet))
+       (declare (fixnum length) (type octet octet))
        (incf sum)
        (incf i length)))
     (check-end format start end i warnp)
@@ -106,7 +111,7 @@
   (let ((sum 0)
         (i start)
         (last-octet 0))
-    (declare (fixnum i sum) (octet last-octet))
+    (declare (fixnum i sum) (type octet last-octet))
     (loop
      (when (>= i end)
        (return))
@@ -115,7 +120,7 @@
                           ((= #b11000000 (logand octet #b11100000)) 2)
                           ((= #b11100000 (logand octet #b11110000)) 3)
                           (t 4))))
-       (declare (fixnum length) (octet octet))
+       (declare (fixnum length) (type octet octet))
        (unless (and (= octet +lf+) (= last-octet +cr+))
          (incf sum))
        (incf i length)
@@ -126,6 +131,7 @@
 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
+  (declare (ignore sequence))
   (when (and warnp (oddp (- end start)))
     (signal-encoding-warning format "~A octet~:P cannot be decoded ~
 using UTF-16 as ~:*~A is not even."
@@ -144,7 +150,7 @@
      (let* ((high-octet (aref sequence (1+ i)))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
                           (t 2))))
-       (declare (fixnum length) (octet high-octet))
+       (declare (fixnum length) (type octet high-octet))
        (incf sum)
        (incf i length)))
     (check-end format start (+ end 2) i warnp)
@@ -163,7 +169,7 @@
      (let* ((high-octet (aref sequence i))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
                           (t 2))))
-       (declare (fixnum length) (octet high-octet))
+       (declare (fixnum length) (type octet high-octet))
        (incf sum)
        (incf i length)))
     (check-end format start (+ end 2) i warnp)
@@ -175,7 +181,7 @@
   (let ((sum 0)
         (i start)
         (last-octet 0))
-    (declare (fixnum i sum) (octet last-octet))
+    (declare (fixnum i sum) (type octet last-octet))
     (decf end 2)
     (loop
      (when (> i end)
@@ -183,7 +189,7 @@
      (let* ((high-octet (aref sequence (1+ i)))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
                           (t 2))))
-       (declare (fixnum length) (octet high-octet))
+       (declare (fixnum length) (type octet high-octet))
        (unless (and (zerop high-octet)
                     (= (the octet (aref sequence i)) +lf+)
                     (= last-octet +cr+))         
@@ -201,7 +207,7 @@
   (let ((sum 0)
         (i start)
         (last-octet 0))
-    (declare (fixnum i sum) (octet last-octet))
+    (declare (fixnum i sum) (type octet last-octet))
     (decf end 2)
     (loop
      (when (> i end)
@@ -209,7 +215,7 @@
      (let* ((high-octet (aref sequence i))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
                           (t 2))))
-       (declare (fixnum length) (octet high-octet))
+       (declare (fixnum length) (type octet high-octet))
        (unless (and (zerop high-octet)
                     (= (the octet (aref sequence (1+ i))) +lf+)
                     (= last-octet +cr+))
@@ -224,6 +230,7 @@
 (defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
+  (declare (ignore sequence))
   (let ((length (- end start)))
     (when (and warnp (plusp (mod length 4)))
       (signal-encoding-warning format "~A octet~:P cannot be decoded ~
@@ -233,12 +240,13 @@
 (defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence))
+  (declare (ignore sequence warnp))
   (ceiling (- end start) 4))
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
+  (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
     (decf end 8)
@@ -256,6 +264,7 @@
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
+  (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
     (decf end 8)
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -55,7 +55,7 @@
                                ((< char-code #x800) 2)
                                ((< char-code #x10000) 3)
                                (t 4))))
-       (declare (fixnum char-length) (char-code-integer char-code))
+       (declare (fixnum char-length) (type char-code-integer char-code))
        (incf sum char-length)
        (incf i)))
     sum))
@@ -75,7 +75,7 @@
                                ((< char-code #x800) 2)
                                ((< char-code #x10000) 3)
                                (t 4))))
-       (declare (fixnum char-length) (char-code-integer char-code))
+       (declare (fixnum char-length) (type char-code-integer char-code))
        (incf sum char-length)
        (incf i)))
     sum))
@@ -92,7 +92,7 @@
      (let* ((char-code (char-code (aref sequence i)))
             (char-length (cond ((< char-code #x10000) 2)
                                (t 4))))
-       (declare (fixnum char-length) (char-code-integer char-code))
+       (declare (fixnum char-length) (type char-code-integer char-code))
        (incf sum char-length)
        (incf i)))
     sum))
@@ -110,7 +110,7 @@
             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
                                ((< char-code #x10000) 2)
                                (t 4))))
-       (declare (fixnum char-length) (char-code-integer char-code))
+       (declare (fixnum char-length) (type char-code-integer char-code))
        (incf sum char-length)
        (incf i)))
     sum))
@@ -128,7 +128,7 @@
             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
                                ((< char-code #x10000) 2)
                                (t 4))))
-       (declare (fixnum char-length) (char-code-integer char-code))
+       (declare (fixnum char-length) (type char-code-integer char-code))
        (incf sum char-length)
        (incf i)))
     sum))
@@ -194,7 +194,7 @@
                         (setq buffer-pos 0))
                       (write-octet (octet)
                         "Adds one octet to the buffer and flushes it if necessary."
-                        (declare (octet octet))
+                        (declare (type octet octet))
                         (when (>= buffer-pos buffer-size)
                           (flush-buffer))
                         (setf (aref buffer buffer-pos) octet)
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp	(original)
+++ branches/edi/input.lisp	Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.76 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -150,8 +150,7 @@
                    (external-format flexi-stream-external-format))
       flexi-input-stream
     (let ((counter 0) octets-reversed)
-      (declare (integer position)
-               (fixnum counter))
+      (declare (fixnum counter))
       (flet ((writer (octet)
                (incf counter)
                (push octet octets-reversed)))
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.28 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -69,7 +69,7 @@
                            (prog1
                                (nth i sequence)
                              (incf i))))))))
-    (declare (fixnum i length) (dynamic-extent reader))
+    (declare (fixnum i) (dynamic-extent reader))
     (labels ((pseudo-writer (octet)
                (declare (ignore octet))
                (decf i))
@@ -100,6 +100,6 @@
 subsequence \(of octets) of SEQUENCE from START to END in characters
 if decoded using the external format EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
-  (declare (fixnum start end) (string string))
+  (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))
   (compute-number-of-chars external-format sequence start end t))
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    25 May '08
                    
                        Author: eweitz
Date: Sat May 24 23:14:26 2008
New Revision: 55
Modified:
   branches/edi/conditions.lisp
   branches/edi/decode.lisp
   branches/edi/doc/index.html
   branches/edi/input.lisp
   branches/edi/packages.lisp
   branches/edi/strings.lisp
   branches/edi/test/test.lisp
   branches/edi/util.lisp
Log:
Pre-compute string length
Enhanced condition hierarchy
Passes tests on LW
Modified: branches/edi/conditions.lisp
==============================================================================
--- branches/edi/conditions.lisp	(original)
+++ branches/edi/conditions.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -82,22 +82,32 @@
   (:documentation "Errors of this type are signalled if an erroneous
 position spec is used in conjunction with FILE-POSITION."))
 
-(define-condition external-format-error ()
+(define-condition external-format-condition (simple-condition)
   ((external-format :initarg :external-format
                     :initform nil
-                    :reader external-format-error-external-format))
+                    :reader external-format-condition-external-format))
+  (:documentation "Superclass for all conditions related to external
+formats."))
+
+(define-condition external-format-error (external-format-condition error)
+  ()
   (:documentation "Superclass for all errors related to external
 formats."))
 
-(define-condition external-format-simple-error (external-format-error simple-condition)
+(define-condition external-format-warning (external-format-condition warning)
   ()
-  (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting
-capabilities."))
+  (:documentation "Superclass for all warnings related to external
+formats."))
   
-(define-condition external-format-encoding-error (external-format-simple-error)
+(define-condition external-format-encoding-error (external-format-error)
   ()
   (:documentation "Errors of this type are signalled if there is an
 encoding problem."))
+  
+(define-condition external-format-encoding-warning (external-format-warning)
+  ()
+  (:documentation "Warnings of this type are signalled if there is an
+encoding problem."))
 
 (defun signal-encoding-error (external-format format-control &rest format-args)
   "Convenience function similar to ERROR to signal conditions of type
@@ -106,3 +116,11 @@
          :format-control format-control
          :format-arguments format-args
          :external-format external-format))
+
+(defun signal-encoding-warning (external-format format-control &rest format-args)
+  "Convenience function similar to WARN to signal conditions of type
+EXTERNAL-FORMAT-ENCODING-WARNING."
+  (warn 'external-format-encoding-warning
+        :format-control format-control
+        :format-arguments format-args
+        :external-format external-format))
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.18 2008/05/25 01:42:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.19 2008/05/25 03:07:59 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,23 +29,26 @@
 
 (in-package :flexi-streams)
 
-(defgeneric compute-number-of-chars (format sequence start end)
+(defgeneric compute-number-of-chars (format sequence start end warnp)
   (declare #.*standard-optimize-settings*)
   (:documentation "Computes the exact number of characters required to
 decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT."))
+external format FORMAT.  If WARNP is NIL, warnings will be muffled."))
 
-(defmethod compute-number-of-chars :around (format (list list) start end)
+(defmethod compute-number-of-chars :around (format (list list) start end warnp)
   (declare #.*standard-optimize-settings*)
-  (call-next-method format (coerce list 'vector) start end))
+  (call-next-method format (coerce list 'vector) start end warnp))
 
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (declare (ignore sequence))  
   (- end start))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-8-bit-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+  ;; this method only applies to the 8-bit formats as all other
+  ;; formats with CRLF line endings have their own specialized methods
+  ;; below
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((i start)
@@ -61,18 +64,23 @@
        (decf length)))
     length))
 
-(defun check-end (format start end i)  
+(defgeneric check-end (format start end i warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end i))
-  (unless (= i end)
-    (signal-encoding-error format "These ~A octet~:P can't be ~
-decoded using ~A as the sequence is too short.  ~A octet~:P ~
-missing at then end."
-                           (- end start)
-                           (external-format-name format)
-                           (- i end))))
+  (:method (format start end i warnp)
+   (when (and warnp (> i end))
+     (signal-encoding-warning format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
+at then end."
+                              (- end start)
+                              (external-format-name format)
+                              (- i end))))
+  (:method ((format flexi-utf-16-format) start end i warnp)
+   ;; don't warn twice
+   (when (evenp (- end start))
+     (call-next-method))))
 
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
@@ -89,10 +97,10 @@
        (declare (fixnum length) (octet octet))
        (incf sum)
        (incf i length)))
-    (check-end format start end i)
+    (check-end format start end i warnp)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
@@ -112,25 +120,26 @@
          (incf sum))
        (incf i length)
        (setq last-octet octet)))
-    (check-end format start end i)
+    (check-end format start end i warnp)
     sum))
 
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (unless (evenp (- end start))
-    (signal-encoding-error format "~A octet~:P cannot be decoded using ~
-UTF-16 as ~:*~A is not even."
-                           (- end start))))  
+  (when (and warnp (oddp (- end start)))
+    (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-16 as ~:*~A is not even."
+                             (- end start))))  
   
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
+    (decf end 2)
     (loop
-     (when (>= i end)
+     (when (> i end)
        (return))
      (let* ((high-octet (aref sequence (1+ i)))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -138,17 +147,18 @@
        (declare (fixnum length) (octet high-octet))
        (incf sum)
        (incf i length)))
-    (check-end format start end i)
+    (check-end format start (+ end 2) i warnp)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
+    (decf end 2)
     (loop
-     (when (>= i end)
+     (when (> i end)
        (return))
      (let* ((high-octet (aref sequence i))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -156,18 +166,19 @@
        (declare (fixnum length) (octet high-octet))
        (incf sum)
        (incf i length)))
-    (check-end format start end i)
+    (check-end format start (+ end 2) i warnp)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
         (i start)
         (last-octet 0))
     (declare (fixnum i sum) (octet last-octet))
+    (decf end 2)
     (loop
-     (when (>= i end)
+     (when (> i end)
        (return))
      (let* ((high-octet (aref sequence (1+ i)))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -175,24 +186,25 @@
        (declare (fixnum length) (octet high-octet))
        (unless (and (zerop high-octet)
                     (= (the octet (aref sequence i)) +lf+)
-                    (= last-octet +cr+))
+                    (= last-octet +cr+))         
          (incf sum))
-       (incf i length)
        (setq last-octet (if (zerop high-octet)
                           (aref sequence i)
-                          0))))
-    (check-end format start end i)
+                          0))
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
     sum))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((sum 0)
         (i start)
         (last-octet 0))
     (declare (fixnum i sum) (octet last-octet))
+    (decf end 2)
     (loop
-     (when (>= i end)
+     (when (> i end)
        (return))
      (let* ((high-octet (aref sequence i))
             (length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -202,32 +214,33 @@
                     (= (the octet (aref sequence (1+ i))) +lf+)
                     (= last-octet +cr+))
          (incf sum))
-       (incf i length)
        (setq last-octet (if (zerop high-octet)
                           (aref sequence (1+ i))
-                          0))))
-    (check-end format start end i)
+                          0))
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
     sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((length (- end start)))
-    (unless (zerop (mod length 4))
-      (signal-encoding-error format "~A octet~:P cannot be decoded using ~
-UTF-32 as ~:*~A is not a multiple-value of four."
-                             length))))  
+    (when (and warnp (plusp (mod length 4)))
+      (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-32 as ~:*~A is not a multiple-value of four."
+                               length))))
 
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (declare (ignore sequence))
-  (/ (- end start) 4))
+  (ceiling (- end start) 4))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((i start)
-        (length (/ (- end start) 4)))
+        (length (ceiling (- end start) 4)))
     (decf end 8)
     (loop
      (when (> i end)
@@ -240,11 +253,11 @@
            (t (incf i 4))))
     length))
 
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
   (let ((i start)
-        (length (/ (- end start) 4)))
+        (length (ceiling (- end start) 4)))
     (decf end 8)
     (loop
      (when (> i end)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html	(original)
+++ branches/edi/doc/index.html	Sat May 24 23:14:26 2008
@@ -69,7 +69,12 @@
       <li><a href="#external-format-equal"><code>external-format-equal</code></a>
       <li><a href="#*default-eol-style*"><code>*default-eol-style*</code></a>
       <li><a href="#*default-little-endian*"><code>*default-little-endian*</code></a>
+      <li><a href="#external-format-condition"><code>external-format-condition</code></a>
+      <li><a href="#external-format-condition-external-format"><code>external-format-condition-external-format</code></a>
+      <li><a href="#external-format-error"><code>external-format-error</code></a>
+      <li><a href="#external-format-warning"><code>external-format-warning</code></a>
       <li><a href="#external-format-encoding-error"><code>external-format-encoding-error</code></a>
+      <li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
       </ol>
     <li><a href="#flexi-streams">Flexi streams</a>
       <ol>
@@ -86,7 +91,6 @@
       <li><a href="#flexi-stream-stream"><code>flexi-stream-stream</code></a>
       <li><a href="#unread-byte"><code>unread-byte</code></a>
       <li><a href="#peek-byte"><code>peek-byte</code></a>
-      <li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
       <li><a href="#octet"><code>octet</code></a>
       <li><a href="#flexi-stream-error"><code>flexi-stream-error</code></a>
       <li><a href="#flexi-stream-out-of-sync-error"><code>flexi-stream-out-of-sync-error</code></a>
@@ -526,29 +530,98 @@
 </blockquote>
 
 <p><br>[Condition]
-<br><a class=none name="external-format-error"><b>external-format-error</b></a>
+<br><a class=none name="external-format-condition"><b>external-format-condition</b></a>
 
 <blockquote><br>
-All errors related to <a href="#external-formats">external formats</a> are of this type.
-There's a slot for the external format which can be accessed with <a href="#external-format-error-external-format"><code>EXTERNAL-FORMAT-ERROR-EXTERNAL-FORMAT</code></a>.
+All conditions related to <a href="#external-formats">external formats</a> are of this type.
+There's a slot for the external format which can be accessed with <a href="#external-format-condition-external-format"><code>EXTERNAL-FORMAT-CONDITION-EXTERNAL-FORMAT</code></a>.
 </blockquote>
 
 <p><br>[Reader]
-<br><a class=none name="external-format-error-external-format"><b>external-format-error-external-format</b> <i>condition</i> => <i>external-format</i></a>
+<br><a class=none name="external-format-condition-external-format"><b>external-format-condition-external-format</b> <i>condition</i> => <i>external-format</i></a>
 
 <blockquote><br> If <code><i>condition</i></code> is of
-type <a href="#external-format-error"><code>EXTERNAL-FORMAT-ERROR</code></a>,
+type <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>,
 this function will return the associated external format.  Note that
-there are errors which happen during the creation of external formats
-where this method returns <code>NIL</code>.
+there are situation which happen during the creation of external
+formats where this method returns <code>NIL</code>.
+</blockquote>
+
+<p><br>[Condition]
+<br><a class=none name="external-format-warning"><b>external-format-warning</b></a>
+
+<blockquote><br>
+All warnings related to <a href="#external-formats">external formats</a> are of this type.
+This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
+</blockquote>
+
+<p><br>[Condition]
+<br><a class=none name="external-format-error"><b>external-format-error</b></a>
+
+<blockquote><br>
+All errors related to <a href="#external-formats">external formats</a> are of this type.
+This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
 </blockquote>
 
 <p><br>[Condition]
 <br><a class=none name="external-format-encoding-error"><b>external-format-encoding-error</b></a>
 
 <blockquote><br>
-All errors related to encoding problems with <a href="#flexi-streams">flexi streams</a> are of this type.  (This includes situation where an end of file is encountered in the middle of a multi-octet character.)  When this condition is signalled during reading, <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code>
-restart</a> is provided. See also <a href="#*substitution-char*"><code>*SUBSTITUTION-CHAR*</code></a> and example for it. <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> is a subtype of <a href="#external-format-error"><code>EXTERNAL-FORMAT-ERROR</code></a>.
+All errors related to encoding problems with <a href="#external-formats">external formats</a> are of this type.  (This includes situation where an end of file is encountered in the middle of a multi-octet character.)  When this condition is signalled during reading, <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code>
+restart</a> is provided. See also <a href="#*substitution-char*"><code>*SUBSTITUTION-CHAR*</code></a> and the example for it. <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> is a subtype of <a href="#external-format-error"><code>EXTERNAL-FORMAT-ERROR</code></a>.
+</blockquote>
+
+<p><br>[Special variable]
+<br><a class=none name="*substitution-char*"><b>*substitution-char*</b></a>
+
+<blockquote><br>
+If this value is not NIL, it should be a character which is used
+(as if by a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code> restart</a>) whenever during reading an error of
+type <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> would have been signalled otherwise.
+
+<pre>
+CL-USER 1 > (defun foo ()
+              <font color=orange>;; not a valid UTF-8 sequence</font>
+              (<a href="#with-input-from-sequence" class=noborder>with-input-from-sequence</a> (in '(#xe4 #xf6 #xfc))
+                (setq in (<a href="#make-flexi-stream" class=noborder>make-flexi-stream</a> in :external-format :utf8))
+                (read-line in)))
+FOO
+
+CL-USER 2 > (foo)
+
+Error: Unexpected value #xF6 in UTF-8 sequence.
+  1 (continue) Specify a character to be used instead.
+  2 (abort) Return to level 0.
+  3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed,  or :? for other options
+
+CL-USER 3 : 1 > :c
+Type a character: x
+
+Error: End of file while in UTF-8 sequence.
+  1 (continue) Specify a character to be used instead.
+  2 (abort) Return to level 0.
+  3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed,  or :? for other options
+
+CL-USER 4 : 1 > :c
+Type a character: y
+"xy"
+T
+
+CL-USER 5 > (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/m_handle.htm" class=noborder>handler-bind</a> ((<a href="#external-format-encoding-error" class=noborder>external-format-encoding-error</a> (lambda (condition)
+                                                          (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm" class=noborder>use-value</a> #\-))))
+              (foo))
+"--"
+T
+
+CL-USER 6 > (let ((<a href="#*SUBSTITUTION-CHAR*" class=noborder>*substitution-char*</a> #\?))
+              (foo))
+"??"
+T
+</pre>
 </blockquote>
 
 <h4><a name="flexi-streams" class=none>Flexi streams</a></h4>
@@ -739,59 +812,6 @@
 Note that the parameters aren't in the same order as with <a href="http://www.lispworks.com/documentation/HyperSpec/Body/f_peek_c.htm"><code>PEEK-CHAR</code></a> because it doesn't make much sense to make <code><i>stream</i></code> an optional argument.
 </blockquote>
 
-<p><br>[Special variable]
-<br><a class=none name="*substitution-char*"><b>*substitution-char*</b></a>
-
-<blockquote><br>
-If this value is not NIL, it should be a character which is used
-(as if by a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code> restart</a>) whenever during reading an error of
-type <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> would have been signalled otherwise.
-
-<pre>
-CL-USER 1 > (defun foo ()
-              <font color=orange>;; not a valid UTF-8 sequence</font>
-              (<a href="#with-input-from-sequence" class=noborder>with-input-from-sequence</a> (in '(#xe4 #xf6 #xfc))
-                (setq in (<a href="#make-flexi-stream" class=noborder>make-flexi-stream</a> in :external-format :utf8))
-                (read-line in)))
-FOO
-
-CL-USER 2 > (foo)
-
-Error: Unexpected value #xF6 in UTF-8 sequence.
-  1 (continue) Specify a character to be used instead.
-  2 (abort) Return to level 0.
-  3 Return to top loop level 0.
-
-Type :b for backtrace, :c <option number> to proceed,  or :? for other options
-
-CL-USER 3 : 1 > :c
-Type a character: x
-
-Error: End of file while in UTF-8 sequence.
-  1 (continue) Specify a character to be used instead.
-  2 (abort) Return to level 0.
-  3 Return to top loop level 0.
-
-Type :b for backtrace, :c <option number> to proceed,  or :? for other options
-
-CL-USER 4 : 1 > :c
-Type a character: y
-"xy"
-T
-
-CL-USER 5 > (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/m_handle.htm" class=noborder>handler-bind</a> ((<a href="#external-format-encoding-error" class=noborder>external-format-encoding-error</a> (lambda (condition)
-                                                          (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm" class=noborder>use-value</a> #\-))))
-              (foo))
-"--"
-T
-
-CL-USER 6 > (let ((<a href="#*SUBSTITUTION-CHAR*" class=noborder>*substitution-char*</a> #\?))
-              (foo))
-"??"
-T
-</pre>
-</blockquote>
-
 <p><br>[Type]
 <br><a class=none name="octet"><b>octet</b></a>
 
@@ -997,7 +1017,7 @@
 
 <blockquote><br> Converts the Lisp
 sequence <code><i>sequence</i></code> of <a href="#octet">octets</a>
-from <code><i>start</i></code> to <code><i>end</i></code> to string
+from <code><i>start</i></code> to <code><i>end</i></code> to a string
 using the <a href="#external-formats">external format</a> designated
 by <code><i>external-format</i></code>.  The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
@@ -1075,7 +1095,7 @@
 his work on making FLEXI-STREAMS faster.
 
 <p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.112 2008/05/25 01:41:25 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp	(original)
+++ branches/edi/input.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.75 2008/05/23 14:43:09 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.76 2008/05/25 03:07:59 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -354,10 +354,10 @@
   (with-accessors ((last-char-code flexi-stream-last-char-code))
       stream
     (unless last-char-code
-      (error 'flexi-stream-simple-error
+      (error 'flexi-stream-error
              :format-control "No character to unread from this stream \(or external format has changed or last reading operation was binary)."))
     (unless (= (char-code char) last-char-code)
-      (error 'flexi-stream-simple-error
+      (error 'flexi-stream-error
              :format-control "Last character read (~S) was different from ~S."
              :format-arguments (list (code-char last-char-code) char)))
     (unread-char% char stream)
@@ -374,10 +374,10 @@
                    (position flexi-stream-position))
       flexi-input-stream
     (unless last-octet
-      (error 'flexi-stream-simple-error
+      (error 'flexi-stream-error
              :format-control "No byte to unread from this stream \(or last reading operation read a character)."))
     (unless (= byte last-octet)
-      (error 'flexi-stream-simple-error
+      (error 'flexi-stream-error
              :format-control "Last byte read was different from #x~X."
              :format-arguments (list byte)))
     (setq last-octet nil)
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp	(original)
+++ branches/edi/packages.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.36 2008/05/25 01:40:54 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -41,14 +41,17 @@
            :*default-little-endian*
            :*substitution-char*
            :char-length
+           :external-format-condition
+           :external-format-condition-external-format
            :external-format-eol-style
            :external-format-error
-           :external-format-error-external-format
            :external-format-encoding-error
+           :external-format-encoding-warning
            :external-format-equal
            :external-format-id
            :external-format-little-endian
            :external-format-name
+           :external-format-warning
            :flexi-input-stream
            :flexi-output-stream
            :flexi-io-stream
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.26 2008/05/25 01:41:32 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.28 2008/05/25 03:07:59 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -45,13 +45,11 @@
                                   (external-format :latin1)
                                   (start 0) (end (length sequence)))
   "Converts the Lisp sequence SEQUENCE of octets from START to END to
-string using the external format designated by EXTERNAL-FORMAT."
+a string using the external format designated by EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))
-  (let* ((factor (encoding-factor external-format))
-         (length (- end start))
-         (i start)
+  (let* ((i start)
          (reader (etypecase sequence
                    ((array octet *)
                     (lambda ()
@@ -82,37 +80,12 @@
         (flet ((next-char ()
                  (code-char (octets-to-char-code external-format reader))))
           (declare (inline next-char))
-          (etypecase factor
-            (integer
-             (let* ((string-length (ceiling length factor))
-                    (string (make-array string-length
-                                        :element-type 'char*)))
-               (declare (fixnum string-length))
-               (loop for j of-type fixnum from 0 below string-length
-                     do (setf (schar string j) (next-char))
-                     finally (return string))))
-            (double-float
-             ;; this is a bit clunky but hopefully a bit more efficient than
-             ;; using VECTOR-PUSH-EXTEND
-             (let* ((string-length (ceiling length (the double-float factor)))
-                    (string (make-array string-length
-                                        :element-type 'char*
-                                        :fill-pointer t
-                                        :adjustable t))
-                    (j 0))
-               (declare (fixnum j string-length)
-                        (double-float factor))
-               (loop
-                (when (>= i end)
-                  (return))
-                (when (>= j string-length)
-                  (setq factor (/ factor 2.0d0))
-                  (incf string-length (the fixnum (ceiling (- end i) factor)))
-                  (adjust-array string string-length :fill-pointer t))
-                (setf (char string j) (next-char))
-                (incf j))
-               (setf (fill-pointer string) j)
-               string))))))))
+          (let* ((string-length (compute-number-of-chars external-format sequence start end nil))
+                 (string (make-array string-length :element-type 'char*)))
+            (declare (fixnum string-length))
+            (loop for j of-type fixnum from 0 below string-length
+                  do (setf (schar string j) (next-char))
+                  finally (return string))))))))
 
 (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
   "Returns the length of the substring of STRING from START to END in
@@ -129,4 +102,4 @@
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end) (string string))
   (setq external-format (maybe-convert-external-format external-format))
-  (compute-number-of-chars external-format sequence start end))
+  (compute-number-of-chars external-format sequence start end t))
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp	(original)
+++ branches/edi/test/test.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $
 
 ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -264,8 +264,8 @@
                       `(handler-case
                            (unless ,expression
                              (fail "Expression ~S failed.~%" ',expression))
-                         (condition (c)
-                           (fail "Expression ~S failed signaling condition of type ~A: ~A.~%" 
+                         (error (c)
+                           (fail "Expression ~S failed signalling error of type ~A: ~A.~%" 
                                  ',expression (type-of c) c)))))
            (format *error-output* "Test ~S~%" ,test-description)
            ,@body
@@ -473,10 +473,10 @@
       (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))
       ;; not a valid UTF-8 sequence
       (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))
-      (check (string= "??" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
+      (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
       ;; UTF-8 can't start neither with #b11111110 nor with #b11111111
       (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8)))
-      (check (string= "??" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+      (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
     (let ((*substitution-char* nil))
       ;; :ASCII doesn't have characters with char codes > 127
       (check (string= "abc" (using-values (#\b #\c)
@@ -490,13 +490,13 @@
                               (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))))
       ;; not a valid UTF-8 sequence
       (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
-      (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
+      (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
       ;; UTF-8 can't start neither with #b11111110 nor with #b11111111
       (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8))))
-      (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+      (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
       ;; only one byte
       (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le))))
-      (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16le))))
+      (check (string= "" (read-flexi-line* #(#x01) :utf-16le)))
       ;; two bytes, but value of resulting word suggests that another word follows
       (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le))))
       (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le))))
@@ -507,7 +507,7 @@
       (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be))))
       (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be))))
       (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be))))
-      (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16be))))
+      (check (string= "" (read-flexi-line* #(#x01) :utf-16be)))
       (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be))))
       (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be))))
       ;; the only case when error is signalled for UTF-32 is at end of file
Modified: branches/edi/util.lisp
==============================================================================
--- branches/edi/util.lisp	(original)
+++ branches/edi/util.lisp	Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.22 2008/05/25 01:40:54 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.23 2008/05/25 03:07:59 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -115,7 +115,7 @@
     (unless (find real-name +name-map+
                   :test #'eq
                   :key #'cdr)
-      (error 'external-format-simple-error
+      (error 'external-format-error
              :format-control "~S is not known to be a name for an external format."
              :format-arguments (list name)))
     real-name))
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sat May 24 21:43:56 2008
New Revision: 54
Modified:
   branches/edi/decode.lisp
   branches/edi/doc/index.html
   branches/edi/packages.lisp
   branches/edi/specials.lisp
   branches/edi/strings.lisp
   branches/edi/util.lisp
Log:
Compute decoding length
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.16 2008/05/20 23:01:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.18 2008/05/25 01:42:50 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,6 +29,234 @@
 
 (in-package :flexi-streams)
 
+(defgeneric compute-number-of-chars (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of characters required to
+decode the sequence of octets in SEQUENCE from START to END using the
+external format FORMAT."))
+
+(defmethod compute-number-of-chars :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'vector) start end))
+
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))  
+  (- end start))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-8-bit-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((i start)
+        (length (- end start)))
+    (declare (fixnum i length))
+    (loop
+     (when (>= i end)
+       (return))
+     (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
+       (unless position
+         (return))
+       (setq i (1+ position))
+       (decf length)))
+    length))
+
+(defun check-end (format start end i)  
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end i))
+  (unless (= i end)
+    (signal-encoding-error format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short.  ~A octet~:P ~
+missing at then end."
+                           (- end start)
+                           (external-format-name format)
+                           (- i end))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((octet (aref sequence i))
+            (length (cond ((not (logbitp 7 octet)) 1)
+                          ((= #b11000000 (logand octet #b11100000)) 2)
+                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          (t 4))))
+       (declare (fixnum length) (octet octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start end i)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (octet last-octet))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((octet (aref sequence i))
+            (length (cond ((not (logbitp 7 octet)) 1)
+                          ((= #b11000000 (logand octet #b11100000)) 2)
+                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          (t 4))))
+       (declare (fixnum length) (octet octet))
+       (unless (and (= octet +lf+) (= last-octet +cr+))
+         (incf sum))
+       (incf i length)
+       (setq last-octet octet)))
+    (check-end format start end i)
+    sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (unless (evenp (- end start))
+    (signal-encoding-error format "~A octet~:P cannot be decoded using ~
+UTF-16 as ~:*~A is not even."
+                           (- end start))))  
+  
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((high-octet (aref sequence (1+ i)))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (octet high-octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start end i)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((high-octet (aref sequence i))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (octet high-octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start end i)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (octet last-octet))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((high-octet (aref sequence (1+ i)))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (octet high-octet))
+       (unless (and (zerop high-octet)
+                    (= (the octet (aref sequence i)) +lf+)
+                    (= last-octet +cr+))
+         (incf sum))
+       (incf i length)
+       (setq last-octet (if (zerop high-octet)
+                          (aref sequence i)
+                          0))))
+    (check-end format start end i)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (octet last-octet))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((high-octet (aref sequence i))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (octet high-octet))
+       (unless (and (zerop high-octet)
+                    (= (the octet (aref sequence (1+ i))) +lf+)
+                    (= last-octet +cr+))
+         (incf sum))
+       (incf i length)
+       (setq last-octet (if (zerop high-octet)
+                          (aref sequence (1+ i))
+                          0))))
+    (check-end format start end i)
+    sum))
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((length (- end start)))
+    (unless (zerop (mod length 4))
+      (signal-encoding-error format "~A octet~:P cannot be decoded using ~
+UTF-32 as ~:*~A is not a multiple-value of four."
+                             length))))  
+
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (/ (- end start) 4))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((i start)
+        (length (/ (- end start) 4)))
+    (decf end 8)
+    (loop
+     (when (> i end)
+       (return))
+     (cond ((loop for j of-type fixnum from i
+                  for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
+                  always (= octet (aref sequence j)))
+            (decf length)
+            (incf i 8))
+           (t (incf i 4))))
+    length))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((i start)
+        (length (/ (- end start) 4)))
+    (decf end 8)
+    (loop
+     (when (> i end)
+       (return))
+     (cond ((loop for j of-type fixnum from i
+                  for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
+                  always (= octet (aref sequence j)))
+            (decf length)
+            (incf i 8))
+           (t (incf i 4))))
+    length))
+
 (defun recover-from-encoding-error (external-format format-control &rest format-args)
   "Helper function used by OCTETS-TO-CHAR-CODE below to deal with
 encoding errors.  Checks if *SUBSTITUTION-CHAR* is not NIL and returns
@@ -242,7 +470,7 @@
   (declare (ignore reader))
   (let ((char-code (call-next-method)))
     (case char-code
-      (#.(char-code #\Return) #.(char-code #\Newline))
+      (#.+cr+ #.(char-code #\Newline))
       (otherwise char-code))))
 
 (defmethod octets-to-char-code ((format flexi-crlf-mixin) reader)
@@ -251,13 +479,13 @@
   (declare (ignore reader))
   (let ((char-code (call-next-method)))
     (case char-code
-      (#.(char-code #\Return)
+      (#.+cr+
        (let ((next-char-code (call-next-method)))
          (case next-char-code
-           (#.(char-code #\Linefeed) #.(char-code #\Newline))
+           (#.+lf+ #.(char-code #\Newline))
            ;; we saw a CR but no LF afterwards, but then the data
            ;; ended, so we just return #\Return
-           ((nil) #.(char-code #\Return))
+           ((nil) +cr+)
            ;; if the character we peeked at wasn't a
            ;; linefeed character we unread its constituents
            (otherwise (funcall *current-unreader* (code-char next-char-code))
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html	(original)
+++ branches/edi/doc/index.html	Sat May 24 21:43:56 2008
@@ -116,6 +116,7 @@
       <li><a href="#string-to-octets"><code>string-to-octets</code></a>
       <li><a href="#octets-to-string"><code>octets-to-string</code></a>
       <li><a href="#octet-length"><code>octet-length</code></a>
+      <li><a href="#char-length"><code>char-length</code></a>
       </ol>
   </ol>
   <li><a href="#position">File positions</a>
@@ -1005,16 +1006,30 @@
 </blockquote>
 
 <p><br>[Function]
-<br><a class=none name="octet-length"><b>octet-length</b> <i>string <tt>&key</tt> external-format start end</i> => <i>length-or-nil</i></a>
+<br><a class=none name="octet-length"><b>octet-length</b> <i>string <tt>&key</tt> external-format start end</i> => <i>length</i></a>
 
 <blockquote><br>
 
 Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
 <a href="#octet">octets</a> if encoded using
 the <a href="#external-formats">external format</a> designated
-by <code><i>external-format</i></code>.  Might return <code>NIL</code>
-if there's no efficient way to compute the length without iterating
-through the whole string.
+by <code><i>external-format</i></code>.
+The defaults for
+<code><i>start</i></code> and <code><i>end</i></code>
+are <code>0</code> and the length of the string.  The default
+for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+</blockquote>
+
+<p><br>[Function]
+<br><a class=none name="char-length"><b>char-length</b> <i>sequence <tt>&key</tt> external-format start end</i> => <i>length</i></a>
+
+<blockquote><br>
+
+Kind of the inverse of <a href="#octet-length"><code>OCTET-LENGTH</code></a>.
+Returns the length of the subsequence (of <a href="#octet">octets</a>) of <code><i>sequence</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
+characters if decoded using
+the <a href="#external-formats">external format</a> designated
+by <code><i>external-format</i></code>.
 The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
@@ -1060,7 +1075,7 @@
 his work on making FLEXI-STREAMS faster.
 
 <p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.111 2008/05/23 14:56:47 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.112 2008/05/25 01:41:25 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp	(original)
+++ branches/edi/packages.lisp	Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.35 2008/05/21 01:43:43 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.36 2008/05/25 01:40:54 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -40,6 +40,7 @@
   (:export :*default-eol-style*
            :*default-little-endian*
            :*substitution-char*
+           :char-length
            :external-format-eol-style
            :external-format-error
            :external-format-error-external-format
Modified: branches/edi/specials.lisp
==============================================================================
--- branches/edi/specials.lisp	(original)
+++ branches/edi/specials.lisp	Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.32 2008/05/20 23:01:51 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.33 2008/05/25 01:40:54 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -49,6 +49,10 @@
   "Like *STANDARD-OPTIMIZE-SETTINGS*, but \(on LispWorks) with all
 arithmetic being fixnum arithmetic.")
 
+(defconstant +lf+ (char-code #\Linefeed))
+
+(defconstant +cr+ (char-code #\Return))
+
 (defvar *current-unreader* nil
   "A unary function which might be called to `unread' a character
 \(i.e. the sequence of octets it represents).
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.26 2008/05/25 01:41:32 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -116,12 +116,17 @@
 
 (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
   "Returns the length of the substring of STRING from START to END in
-octets if encoded using the external format EXTERNAL-FORMAT.  Might
-return NIL if there's no efficient way to compute the length without
-iterating through the whole string."
+octets if encoded using the external format EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end) (string string))
   (setq external-format (maybe-convert-external-format external-format))
-  (let ((factor (encoding-factor external-format)))
-    (typecase factor
-      (fixnum (* factor (- end start))))))
+  (compute-number-of-octets external-format string start end))
+
+(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
+  "Kind of the inverse of OCTET-LENGTH.  Returns the length of the
+subsequence \(of octets) of SEQUENCE from START to END in characters
+if decoded using the external format EXTERNAL-FORMAT."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum start end) (string string))
+  (setq external-format (maybe-convert-external-format external-format))
+  (compute-number-of-chars external-format sequence start end))
Modified: branches/edi/util.lisp
==============================================================================
--- branches/edi/util.lisp	(original)
+++ branches/edi/util.lisp	Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.22 2008/05/25 01:40:54 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -115,7 +115,7 @@
     (unless (find real-name +name-map+
                   :test #'eq
                   :key #'cdr)
-      (error 'external-format-error
+      (error 'external-format-simple-error
              :format-control "~S is not known to be a name for an external format."
              :format-arguments (list name)))
     real-name))
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sat May 24 19:34:51 2008
New Revision: 53
Added:
   branches/edi/conditions.lisp
Modified:
   branches/edi/encode.lisp
   branches/edi/output.lisp
   branches/edi/strings.lisp
   branches/edi/test/test.lisp
Log:
Faster encoding - passes all tests on LW
Added: branches/edi/conditions.lisp
==============================================================================
--- (empty file)
+++ branches/edi/conditions.lisp	Sat May 24 19:34:51 2008
@@ -0,0 +1,108 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(define-condition flexi-stream-error (stream-error)
+  ()
+  (:documentation "Superclass for all errors related to flexi
+streams."))
+
+(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition)
+  ()
+  (:documentation "Like FLEXI-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition flexi-stream-element-type-error (flexi-stream-error)
+  ((element-type :initarg :element-type
+                 :reader flexi-stream-element-type-error-element-type))
+  (:report (lambda (condition stream)
+             (format stream "Element type ~S not allowed."
+                     (flexi-stream-element-type-error-element-type condition))))
+  (:documentation "Errors of this type are signalled if the flexi
+stream has a wrong element type."))
+
+(define-condition flexi-stream-out-of-sync-error (flexi-stream-error)
+  ()
+  (:report (lambda (condition stream)
+             (declare (ignore condition))
+             (format stream "Stream out of sync from previous
+lookahead, couldn't rewind.")))
+  (:documentation "This can happen if you're trying to write to an IO
+stream which had prior to that `looked ahead' while reading and now
+can't `rewind' to the octet where you /should/ be."))
+
+(define-condition in-memory-stream-error (stream-error)
+  ()
+  (:documentation "Superclass for all errors related to
+IN-MEMORY streams."))
+
+(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition)
+  ()
+  (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition in-memory-stream-closed-error (in-memory-stream-error)
+  ()
+  (:report (lambda (condition stream)
+             (format stream "~S is closed."
+                     (stream-error-stream condition))))
+  (:documentation "An error that is signalled when someone is trying
+to read from or write to a closed IN-MEMORY stream."))
+
+(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error)
+  ((position-spec :initarg :position-spec
+                  :reader in-memory-stream-position-spec-error-position-spec))
+  (:documentation "Errors of this type are signalled if an erroneous
+position spec is used in conjunction with FILE-POSITION."))
+
+(define-condition external-format-error ()
+  ((external-format :initarg :external-format
+                    :initform nil
+                    :reader external-format-error-external-format))
+  (:documentation "Superclass for all errors related to external
+formats."))
+
+(define-condition external-format-simple-error (external-format-error simple-condition)
+  ()
+  (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting
+capabilities."))
+  
+(define-condition external-format-encoding-error (external-format-simple-error)
+  ()
+  (:documentation "Errors of this type are signalled if there is an
+encoding problem."))
+
+(defun signal-encoding-error (external-format format-control &rest format-args)
+  "Convenience function similar to ERROR to signal conditions of type
+EXTERNAL-FORMAT-ENCODING-ERROR."
+  (error 'external-format-encoding-error
+         :format-control format-control
+         :format-arguments format-args
+         :external-format external-format))
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,6 +29,125 @@
 
 (in-package :flexi-streams)
 
+(defgeneric compute-number-of-octets (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))  
+  (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+                               ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (+ (call-next-method)
+     (* (case (external-format-name format)
+          (:utf-32 4)
+          (otherwise 1))
+        (count #\Newline sequence :start start :end end :test #'char=))))
+
 (defgeneric char-to-octets (format char writer)
   (declare #.*standard-optimize-settings*)
   (:documentation "Converts the character CHAR to a sequence of octets
@@ -37,72 +156,188 @@
 repeatedly each octet.  The return value of this function is
 unspecified."))
 
-(defmethod char-to-octets ((format flexi-latin-1-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((octet (char-code char)))
+(defgeneric write-sequence* (format stream sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "A generic function which dispatches on the external
+format and does the real work for STREAM-WRITE-SEQUENCE."))
+
+(defgeneric string-to-octets* (format string start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "A generic function which dispatches on the external
+format and does the real work for STRING-TO-OCTETS."))
+
+(defmacro define-sequence-writers ((format-class) &body body)
+  "Utility macro which defines methods for WRITE-SEQUENCE* and
+STRING-TO-OCTET* for the class FORMAT-CLASS.  For BODY see the
+docstring of DEFINE-CHAR-ENCODERS."
+  `(progn
+     (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+       (declare #.*standard-optimize-settings*)
+       (declare (fixnum start end))
+       (with-accessors ((column flexi-stream-column))
+           stream
+         (let* ((octet-seen-p nil)
+                (buffer-pos 0)
+                ;; estimate should be good enough...
+                (factor (encoding-factor format))
+                ;; we don't want arbitrarily large buffer, do we?
+                (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+                (buffer (make-octet-buffer buffer-size)))
+           (declare (fixnum buffer-pos buffer-size)
+                    (boolean octet-seen-p)
+                    (type (array octet *) buffer))
+           (macrolet ((octet-writer (form)
+                        `(write-octet ,form)))
+             (labels ((flush-buffer ()
+                        "Sends all octets in BUFFER to the underlying stream."
+                        (write-sequence buffer stream :end buffer-pos)
+                        (setq buffer-pos 0))
+                      (write-octet (octet)
+                        "Adds one octet to the buffer and flushes it if necessary."
+                        (declare (octet octet))
+                        (when (>= buffer-pos buffer-size)
+                          (flush-buffer))
+                        (setf (aref buffer buffer-pos) octet)
+                        (incf buffer-pos))
+                      (write-object (object)
+                        "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+                        (etypecase object
+                          (octet (setq octet-seen-p t)
+                                 (write-octet object))
+                          (character (symbol-macrolet ((char-getter object))
+                                       ,@body)))))
+               (macrolet ((iterate (&body output-forms)
+                            "An unhygienic macro to implement the actual
+iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+                            `(loop for index of-type fixnum from start below end
+                                   do (progn ,@output-forms)
+                                   finally (when (plusp buffer-pos)
+                                             (flush-buffer)))))
+                 (etypecase sequence
+                   (string (iterate
+                            (symbol-macrolet ((char-getter (char sequence index)))
+                              ,@body)))
+                   (array (iterate
+                           (symbol-macrolet ((char-getter (aref sequence index)))
+                             ,@body)))
+                   (list  (iterate (write-object (nth index sequence))))))
+               ;; update the column slot, setting it to NIL if we sent
+               ;; octets
+               (setq column
+                     (cond (octet-seen-p nil)
+                           (t (let ((last-newline-pos (position #\Newline sequence
+                                                                :test #'char=
+                                                                :start start
+                                                                :end end
+                                                                :from-end t)))
+                                (cond (last-newline-pos (- end last-newline-pos 1))
+                                      (column (+ column (- end start)))))))))))))  
+     (defmethod string-to-octets* ((format ,format-class) string start end)
+       (declare #.*standard-optimize-settings*)
+       (declare (fixnum start end) (string string))
+       (let ((octets (make-array (compute-number-of-octets format string start end)
+                                 :element-type 'octet))
+             (j 0))
+         (declare (fixnum j))
+         (loop for i of-type fixnum from start below end do
+               (macrolet ((octet-writer (form)
+                            `(progn
+                               (setf (aref (the (array octet *) octets) j) ,form)
+                               (incf j))))
+                 (symbol-macrolet ((char-getter (char string i)))
+                   (progn ,@body))))
+         octets))))
+
+;; char-getter can be called more than once - no side effects
+(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
+  "Utility macro which defines several encoding-related methods for
+the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
+it is assumed that CR-FORMAT-CLASS is the same encoding as
+FORMAT-CLASS but with CR line endings and similar for
+CRLF-FORMAT-CLASS.  BODY is a code template for the code to convert
+one character to octets.  BODY must contain a symbol CHAR-GETTER
+representing the form which is used to obtain the character and a
+forms like \(OCTET-WRITE <thing>) to write the octet <thing>.  The
+CHAR-GETTER form might be called more than once."
+  (let ((body `((locally
+                  (declare #.*fixnum-optimize-settings*)
+                  ,@body))))
+    `(progn
+       (defmethod char-to-octets ((format ,format-class) char writer)
+         (declare (character char) (function writer))
+         (symbol-macrolet ((char-getter char))
+           (macrolet ((octet-writer (form)
+                        `(funcall writer ,form)))
+             ,@body)))
+       (define-sequence-writers (,format-class) ,@body)
+       (define-sequence-writers (,cr-format-class)
+         ,@(sublis `((char-getter . ,(with-unique-names (char)
+                                       `(let ((,char char-getter))
+                                          (declare (character ,char))
+                                          (if (char= ,char #\Newline)
+                                            #\Return
+                                            ,char)))))
+                   body))
+       (define-sequence-writers (,crlf-format-class)
+         ,(with-unique-names (char write-char)
+            `(flet ((,write-char (,char)
+                      ,@(sublis `((char-getter . ,char)) body)))
+               (let ((,char char-getter))
+                 (declare (character ,char))
+                 (cond ((char= ,char #\Newline)
+                        (,write-char #\Return)
+                        (,write-char #\Newline))
+                       (t (,write-char ,char))))))))))
+
+(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format  flexi-crlf-latin-1-format)
+  (let ((octet (char-code char-getter)))
     (when (> octet 255)
-      (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet))
-    (funcall writer octet)))
+      (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
+    (octet-writer octet)))
 
-(defmethod char-to-octets ((format flexi-ascii-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((octet (char-code char)))
+(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+  (let ((octet (char-code char-getter)))
     (when (> octet 127)
-      (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet))
-    (funcall writer octet)))
+      (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
+    (octet-writer octet)))
 
-(defmethod char-to-octets ((format flexi-8-bit-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
+(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
   (with-accessors ((encoding-hash external-format-encoding-hash))
       format
-    (let ((octet (gethash (char-code char) encoding-hash)))
+    (let ((octet (gethash (char-code char-getter) encoding-hash)))
       (unless octet
-        (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet))
-      (funcall writer octet))))
+        (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
+      (octet-writer octet))))
 
-(defmethod char-to-octets ((format flexi-utf-8-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((char-code (char-code char)))
+(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
+  (let ((char-code (char-code char-getter)))
     (tagbody
      (cond ((< char-code #x80)
-            (funcall writer char-code)
+            (octet-writer char-code)
             (go zero))
            ((< char-code #x800)
-            (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code)))
+            (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code)))
             (go one))
            ((< char-code #x10000)
-            (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code)))
+            (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code)))
             (go two))
-           ((< char-code #x200000)
-            (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code)))
-            (go three))
-           ((< char-code #x4000000)
-            (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code)))
-            (go four))
-           (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100))))
-     (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code)))
-     four
-     (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code)))
-     three
-     (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code)))
+           (t
+            (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code)))))
+     (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code)))
      two
-     (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code)))
+     (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code)))
      one
-     (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code)))
+     (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code)))
      zero)))
 
-(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
   (flet ((write-word (word)
-           (funcall writer (ldb (byte 8 0) word))
-           (funcall writer (ldb (byte 8 8) word))))
+           (octet-writer (ldb (byte 8 0) word))
+           (octet-writer (ldb (byte 8 8) word))))
     (declare (inline write-word))
-    (let ((char-code (char-code char)))
+    (let ((char-code (char-code char-getter)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
              (write-word char-code))
@@ -110,14 +345,12 @@
                (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
                (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
 
-(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
   (flet ((write-word (word)
-           (funcall writer (ldb (byte 8 8) word))
-           (funcall writer (ldb (byte 8 0) word))))
+           (octet-writer (ldb (byte 8 8) word))
+           (octet-writer (ldb (byte 8 0) word))))
     (declare (inline write-word))
-    (let ((char-code (char-code char)))
+    (let ((char-code (char-code char-getter)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
              (write-word char-code))
@@ -125,23 +358,19 @@
                (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
                (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
 
-(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((char-code (char-code char)))
-    (funcall writer (ldb (byte 8 0) char-code))
-    (funcall writer (ldb (byte 8 8) char-code))
-    (funcall writer (ldb (byte 8 16) char-code))
-    (funcall writer (ldb (byte 8 24) char-code))))
-
-(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((char-code (char-code char)))
-    (funcall writer (ldb (byte 8 24) char-code))
-    (funcall writer (ldb (byte 8 16) char-code))
-    (funcall writer (ldb (byte 8 8) char-code))
-    (funcall writer (ldb (byte 8 0) char-code))))
+(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
+  (let ((char-code (char-code char-getter)))
+    (octet-writer (ldb (byte 8 0) char-code))
+    (octet-writer (ldb (byte 8 8) char-code))
+    (octet-writer (ldb (byte 8 16) char-code))
+    (octet-writer (ldb (byte 8 24) char-code))))
+
+(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
+  (let ((char-code (char-code char-getter)))
+    (octet-writer (ldb (byte 8 24) char-code))
+    (octet-writer (ldb (byte 8 16) char-code))
+    (octet-writer (ldb (byte 8 8) char-code))
+    (octet-writer (ldb (byte 8 0) char-code))))
 
 (defmethod char-to-octets ((format flexi-cr-mixin) char writer)
   (declare #.*fixnum-optimize-settings*)
Modified: branches/edi/output.lisp
==============================================================================
--- branches/edi/output.lisp	(original)
+++ branches/edi/output.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.65 2008/05/24 23:15:25 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -129,7 +129,7 @@
   ;; needed for AllegroCL - grrr...
   (stream-write-char stream #\Newline))
 
-(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key)
+(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key)
   "An optimized version which uses a buffer underneath.  The function
 can accepts characters as well as octets and it decides what to do
 based on the element type of the sequence \(if possible) or on the
@@ -141,7 +141,7 @@
   (with-accessors ((column flexi-stream-column)
                    (external-format flexi-stream-external-format)
                    (stream flexi-stream-stream))
-      stream
+      flexi-output-stream
     (when (>= start end)
       (return-from stream-write-sequence sequence))
     (when (and (vectorp sequence)
@@ -151,59 +151,8 @@
       (setq column nil)
       (return-from stream-write-sequence
         (write-sequence sequence stream :start start :end end)))
-    (let* ((octet-seen-p nil)
-           (buffer-pos 0)
-           (factor (encoding-factor external-format))
-           (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
-           (buffer (make-octet-buffer buffer-size)))
-      (declare (fixnum buffer-pos buffer-size)
-               (boolean octet-seen-p)
-               (type (array octet *) buffer))
-      (labels ((flush-buffer ()
-                 "Sends all octets in BUFFER to the underlying stream."
-                 (write-sequence buffer stream :end buffer-pos)
-                 (setq buffer-pos 0))
-               (write-octet (octet)
-                 "Adds one octet to the buffer and flush it if necessary."
-                 (declare (octet octet))
-                 (when (>= buffer-pos buffer-size)
-                   (flush-buffer))
-                 (setf (aref buffer buffer-pos) octet)
-                 (incf buffer-pos))
-               (write-character (char)
-                 "Adds the octets representing the character CHAR to the buffer."
-                 (char-to-octets external-format char #'write-octet))
-               (write-object (object)
-                 "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
-                 (etypecase object
-                   (octet (setq octet-seen-p t)
-                          (write-octet object))
-                   (character (write-character object)))))
-        (declare (dynamic-extent (function write-octet)))
-        (macrolet ((iterate (output-form)
-                     "An unhygienic macro to implement the actual
-iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
-                     `(loop for index of-type fixnum from start below end
-                            do ,output-form
-                            finally (when (plusp buffer-pos)
-                                      (flush-buffer)))))
-          (etypecase sequence
-            (string (iterate (write-character (char sequence index))))
-            (array (iterate (write-object (aref sequence index))))
-            (list (iterate (write-object (nth index sequence)))))
-          ;; update the column slot, setting it to NIL if we sent
-          ;; octets
-          (setq column
-                (cond (octet-seen-p nil)
-                      (t (let ((last-newline-pos (position #\Newline sequence
-                                                           :test #'char=
-                                                           :start start
-                                                           :end end
-                                                           :from-end t)))
-                           (cond (last-newline-pos (- end last-newline-pos 1))
-                                 (column (+ column (- end start))))))))))))          
+    ;; otherwise hand over to the external format to do the work
+    (write-sequence* external-format flexi-output-stream sequence start end))
   sequence)
 
 (defmethod stream-write-string ((stream flexi-output-stream) string
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -36,56 +36,10 @@
 octets corresponding to the external format designated by
 EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
-  (declare (fixnum start end) (string string))
+  (declare (string string))
   (setq external-format (maybe-convert-external-format external-format))
-  (let ((factor (encoding-factor external-format))
-        (length (- end start)))
-    (declare (fixnum length))
-    (etypecase factor
-      (integer
-       (let ((octets (make-array (* factor length) :element-type 'octet))
-             (j 0))
-         (declare (fixnum j))
-         (flet ((writer (octet)
-                  (declare (octet octet))
-                  (setf (aref (the (array octet *) octets) j) octet)
-                  (incf j)))
-           (declare (dynamic-extent (function writer)))
-           (loop for i of-type fixnum from start below end do
-                 (char-to-octets external-format
-                                 (char string i)
-                                 #'writer)))
-         octets))
-      (double-float
-       ;; this is a bit clunky but hopefully a bit more efficient than
-       ;; using VECTOR-PUSH-EXTEND
-       (let* ((octets-length (ceiling (* factor length)))
-              (octets (make-array octets-length
-                                  :element-type 'octet
-                                  :fill-pointer t
-                                  :adjustable t))
-              (i start)
-              (j 0))
-         (declare (fixnum i j octets-length)
-                  (double-float factor))
-         (flet ((writer (octet)
-                  (declare (octet octet))
-                  (when (>= j octets-length)
-                    (setq factor (* factor 2.0d0))
-                    (incf octets-length (the fixnum (ceiling (* factor (- end i)))))
-                    (adjust-array octets octets-length :fill-pointer t))
-                  (setf (aref (the (array octet *) octets) j) octet)
-                  (incf j)))
-           (declare (dynamic-extent (function writer)))
-           (loop
-            (when (>= i end)
-              (return))
-            (char-to-octets external-format
-                            (char string i)
-                            #'writer)
-            (incf i))
-           (setf (fill-pointer octets) j)
-           octets))))))
+  ;; the external format knows how to do it...
+  (string-to-octets* external-format string start end))
 
 (defun octets-to-string (sequence &key
                                   (external-format :latin1)
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp	(original)
+++ branches/edi/test/test.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
 
 ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -329,10 +329,11 @@
        (loop for i below (length seq1)
              always (eql (elt seq1 i) (elt seq2 i)))))
 
-(defun read-sequence-test (pathspec external-format)
-  "Several tests to confirm that READ-SEQUENCE behaves as expected."
-  (with-test ((format nil "READ-SEQUENCE tests with format ~S."
-                      (flex::normalize-external-format external-format)))
+(defun sequence-test (pathspec external-format)
+  "Several tests to confirm that READ-SEQUENCE and WRITE-SEQUENCE
+behave as expected."
+  (with-test ((format nil "Sequence tests with format ~S and file ~A."
+                      (flex::normalize-external-format external-format) pathspec))
     (let* ((full-path (merge-pathnames pathspec *this-file*))
            (file-string (file-as-string full-path external-format))
            (string-length (length file-string))
@@ -397,7 +398,33 @@
           (check (sequence-equal array (subseq file-string 25 (- string-length 25))))
           (check (sequence-equal (loop repeat 25
                                        collect (read-char in))
-                                 (subseq file-string (- string-length 25)))))))))
+                                 (subseq file-string (- string-length 25))))))
+      (let ((path-out (ensure-directories-exist (merge-pathnames pathspec *tmp-dir*))))
+        (with-open-file (out path-out
+                             :direction :output
+                             :if-exists :supersede
+                             :element-type 'octet)
+          (let ((out (make-flexi-stream out :external-format external-format)))
+            (write-sequence octets out)))
+        (check (file-equal full-path path-out))
+        (with-open-file (out path-out
+                             :direction :output
+                             :if-exists :supersede
+                             :element-type 'octet)
+          (let ((out (make-flexi-stream out :external-format external-format)))
+            (write-sequence file-string out)))
+        (check (file-equal full-path path-out))
+        (with-open-file (out path-out
+                             :direction :output
+                             :if-exists :supersede
+                             :element-type 'octet)
+          (let ((out (make-flexi-stream out :external-format external-format)))
+            (write-sequence file-string out :end 100)
+            (write-sequence octets out
+                            :start (length (string-to-octets file-string
+                                                             :external-format external-format
+                                                             :end 100)))))
+        (check (file-equal full-path path-out))))))
 
 (defmacro using-values ((&rest values) &body body)
   "Executes BODY and feeds an element from VALUES to the USE-VALUE
@@ -544,7 +571,7 @@
                                               nconc (create-test-combinations file-name symbols t))))
       (incf no-tests (length read-sequence-test-args-list))
       (dolist (args read-sequence-test-args-list)
-        (apply 'read-sequence-test args)))
+        (apply 'sequence-test args)))
     (incf no-tests)
     (error-handling-test)
     (incf no-tests)
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0
                            
                          
                          
                            
    
                          
                        
                    
                    
                        Author: eweitz
Date: Sat May 24 19:29:57 2008
New Revision: 52
Added:
   branches/edi/
      - copied from r51, trunk/
Log:
More needless optimization
                    
                  
                  
                          
                            
                            1
                            
                          
                          
                            
                            0