flexi-streams-cvs
Threads by month
- ----- 2025 -----
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
May 2008
- 2 participants
- 61 discussions

25 May '08
Author: eweitz
Date: Sun May 25 19:43:22 2008
New Revision: 61
Modified:
branches/edi/CHANGELOG
branches/edi/conditions.lisp
branches/edi/decode.lisp
branches/edi/doc/index.html
branches/edi/flexi-streams.asd
branches/edi/length.lisp
branches/edi/packages.lisp
branches/edi/strings.lisp
branches/edi/test/test.lisp
Log:
Ready for release
Modified: branches/edi/CHANGELOG
==============================================================================
--- branches/edi/CHANGELOG (original)
+++ branches/edi/CHANGELOG Sun May 25 19:43:22 2008
@@ -1,3 +1,10 @@
+Version 1.0.0
+2008-05-26
+More redesign for the sake of performance
+More checks for invalid data
+More tests
+Exported functions for length computation
+
Version 0.15.3
2008-05-23
Avoid CHANGE-CLASS on LispWorks if possible
Modified: branches/edi/conditions.lisp
==============================================================================
--- branches/edi/conditions.lisp (original)
+++ branches/edi/conditions.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.9 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -93,21 +93,11 @@
()
(:documentation "Superclass for all errors related to external
formats."))
-
-(define-condition external-format-warning (external-format-condition warning)
- ()
- (:documentation "Superclass for all warnings related to external
-formats."))
(define-condition external-format-encoding-error (external-format-error)
()
(:documentation "Errors of this type are signalled if there is an
encoding problem."))
-
-(define-condition external-format-encoding-warning (external-format-warning)
- ()
- (:documentation "Warnings of this type are signalled if there is an
-encoding problem."))
(defun signal-encoding-error (external-format format-control &rest format-args)
"Convenience function similar to ERROR to signal conditions of type
@@ -116,11 +106,3 @@
:format-control format-control
:format-arguments format-args
:external-format external-format))
-
-(defun signal-encoding-warning (external-format format-control &rest format-args)
- "Convenience function similar to WARN to signal conditions of type
-EXTERNAL-FORMAT-ENCODING-WARNING."
- (warn 'external-format-encoding-warning
- :format-control format-control
- :format-arguments format-args
- :external-format external-format))
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.29 2008/05/25 23:19:19 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -202,7 +202,7 @@
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(let* ((i start)
- (string-length (compute-number-of-chars format sequence start end nil))
+ (string-length (compute-number-of-chars format sequence start end))
(string (make-array string-length :element-type 'char*)))
(declare (fixnum i string-length))
(loop for j of-type fixnum from 0 below string-length
@@ -223,39 +223,46 @@
encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
BODY is a code template for the code to read octets and return one
-character. BODY must contain a symbol OCTET-GETTER representing the
-form which is used to obtain the next octet."
- `(progn
- (defmethod octets-to-char-code ((format ,lf-format-class) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
- (symbol-macrolet ((octet-getter (funcall reader)))
- ,@(sublis '((char-decoder . octets-to-char-code))
- body)))
- (define-sequence-readers (,lf-format-class) ,@body)
- (define-sequence-readers (,cr-format-class)
- ,(with-unique-names (char-code)
- `(let ((,char-code (progn ,@body)))
- (case ,char-code
- (#.+cr+ #.(char-code #\Newline))
- (otherwise ,char-code)))))
- (define-sequence-readers (,crlf-format-class)
- ,(with-unique-names (char-code next-char-code get-char-code)
- `(flet ((,get-char-code () ,@body))
- (let ((,char-code (,get-char-code)))
+character code. BODY must contain a symbol OCTET-GETTER representing
+the form which is used to obtain the next octet."
+ (let* ((body (with-unique-names (char-code)
+ `((let ((,char-code (progn ,@body)))
+ (when (and ,char-code
+ (or (<= #xd8 (logand* #x00ff (ash* ,char-code -8)) #xdf)
+ (> ,char-code #x10ffff)))
+ (recover-from-encoding-error format "Illegal code point ~A \(#x~:*~X)." ,char-code))
+ ,char-code)))))
+ `(progn
+ (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (function reader))
+ (symbol-macrolet ((octet-getter (funcall reader)))
+ ,@(sublis '((char-decoder . octets-to-char-code))
+ body)))
+ (define-sequence-readers (,lf-format-class) ,@body)
+ (define-sequence-readers (,cr-format-class)
+ ,(with-unique-names (char-code)
+ `(let ((,char-code (progn ,@body)))
(case ,char-code
- (#.+cr+
- (let ((,next-char-code (,get-char-code)))
- (case ,next-char-code
- (#.+lf+ #.(char-code #\Newline))
- ;; we saw a CR but no LF afterwards, but then the data
- ;; ended, so we just return #\Return
- ((nil) +cr+)
- ;; if the character we peeked at wasn't a
- ;; linefeed character we unread its constituents
- (otherwise (unget (code-char ,next-char-code))
- ,char-code))))
- (otherwise ,char-code))))))))
+ (#.+cr+ #.(char-code #\Newline))
+ (otherwise ,char-code)))))
+ (define-sequence-readers (,crlf-format-class)
+ ,(with-unique-names (char-code next-char-code get-char-code)
+ `(flet ((,get-char-code () ,@body))
+ (let ((,char-code (,get-char-code)))
+ (case ,char-code
+ (#.+cr+
+ (let ((,next-char-code (,get-char-code)))
+ (case ,next-char-code
+ (#.+lf+ #.(char-code #\Newline))
+ ;; we saw a CR but no LF afterwards, but then the data
+ ;; ended, so we just return #\Return
+ ((nil) +cr+)
+ ;; if the character we peeked at wasn't a
+ ;; linefeed character we unread its constituents
+ (otherwise (unget (code-char ,next-char-code))
+ ,char-code))))
+ (otherwise ,char-code)))))))))
(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
octet-getter)
@@ -296,25 +303,28 @@
(multiple-value-bind (start count)
(cond ((not (logbitp 7 octet))
(values octet 0))
- ((= #b11000000 (logand octet #b11100000))
- (values (logand octet #b00011111) 1))
- ((= #b11100000 (logand octet #b11110000))
- (values (logand octet #b00001111) 2))
- ((= #b11110000 (logand octet #b11111000))
- (values (logand octet #b00000111) 3))
+ ((= #b11000000 (logand* octet #b11100000))
+ (when (= #b11000000 (logand* octet #b11111110))
+ (return-from char-decoder
+ (recover-from-encoding-error format
+ "Illegal value #x~X leads to `overlong' UTF-8 sequence."
+ octet)))
+ (values (logand* octet #b00011111) 1))
+ ((= #b11100000 (logand* octet #b11110000))
+ (values (logand* octet #b00001111) 2))
+ ((= #b11110000 (logand* octet #b11111000))
+ (values (logand* octet #b00000111) 3))
(t (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X at start of UTF-8 sequence."
octet))))
(declare (fixnum count))
- ;; note that we currently don't check for "overlong"
- ;; sequences or other illegal values
(loop for result of-type code-point
- = start then (+ (ash result 6)
- (logand octet #b111111))
+ = start then (+ (ash* result 6)
+ (logand* octet #b111111))
repeat count
for octet of-type octet = (read-next-byte)
- unless (= #b10000000 (logand octet #b11000000))
+ unless (= #b10000000 (logand* octet #b11000000))
do (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X in UTF-8 sequence." octet))
@@ -334,7 +344,7 @@
(setq first-octet-seen t))))
(flet ((read-next-word ()
(+ (the octet (read-next-byte))
- (ash (the octet (read-next-byte)) 8))))
+ (ash* (the octet (read-next-byte)) 8))))
(declare (inline read-next-word))
(let ((word (read-next-word)))
(declare (type (unsigned-byte 16) word))
@@ -346,8 +356,8 @@
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
- (+ (ash (logand #b1111111111 word) 10)
- (logand #b1111111111 next-word)
+ (+ (ash* (logand* #b1111111111 word) 10)
+ (logand* #b1111111111 next-word)
#x10000)))
(t word)))))))
@@ -364,7 +374,7 @@
(t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(flet ((read-next-word ()
- (+ (ash (the octet (read-next-byte)) 8)
+ (+ (ash* (the octet (read-next-byte)) 8)
(the octet (read-next-byte)))))
(declare (inline read-next-word))
(let ((word (read-next-word)))
@@ -377,8 +387,8 @@
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
- (+ (ash (logand #b1111111111 word) 10)
- (logand #b1111111111 next-word)
+ (+ (ash* (logand* #b1111111111 word) 10)
+ (logand* #b1111111111 next-word)
#x10000)))
(t word)))))))
@@ -396,7 +406,7 @@
(setq first-octet-seen t))))
(loop for count of-type fixnum from 0 to 24 by 8
for octet of-type octet = (read-next-byte)
- sum (ash octet count)))))
+ sum (ash* octet count)))))
(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
(let (first-octet-seen)
@@ -412,7 +422,7 @@
(setq first-octet-seen t))))
(loop for count of-type fixnum from 24 downto 0 by 8
for octet of-type octet = (read-next-byte)
- sum (ash octet count)))))
+ sum (ash* octet count)))))
(defmethod octets-to-char-code ((format flexi-cr-mixin) reader)
(declare #.*fixnum-optimize-settings*)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html (original)
+++ branches/edi/doc/index.html Sun May 25 19:43:22 2008
@@ -72,7 +72,6 @@
<li><a href="#external-format-condition"><code>external-format-condition</code></a>
<li><a href="#external-format-condition-external-format"><code>external-format-condition-external-format</code></a>
<li><a href="#external-format-error"><code>external-format-error</code></a>
- <li><a href="#external-format-warning"><code>external-format-warning</code></a>
<li><a href="#external-format-encoding-error"><code>external-format-encoding-error</code></a>
<li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
</ol>
@@ -229,7 +228,7 @@
<p>
FLEXI-STREAMS together with this documentation can be downloaded from <a
href="http://weitz.de/files/flexi-streams.tar.gz">http://weitz.de/files/flexi-streams.tar.gz</a>. The
-current version is 0.15.3.
+current version is 1.0.0.
<p>
Before you install FLEXI-STREAMS you first need to
install the <a
@@ -548,14 +547,6 @@
</blockquote>
<p><br>[Condition]
-<br><a class=none name="external-format-warning"><b>external-format-warning</b></a>
-
-<blockquote><br>
-All warnings related to <a href="#external-formats">external formats</a> are of this type.
-This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
-</blockquote>
-
-<p><br>[Condition]
<br><a class=none name="external-format-error"><b>external-format-error</b></a>
<blockquote><br>
@@ -1063,7 +1054,7 @@
The defaults for
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
-for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+for <code><i>external-format</i></code> is <code>:LATIN1</code>. Note that this function doesn't check for the validity of the data in <code><i>sequence</i></code>.
<p>
This function is optimized for the case
of <code><i>sequence</i></code> being
@@ -1110,7 +1101,7 @@
his work on making FLEXI-STREAMS faster.
<p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.119 2008/05/25 23:42:30 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd (original)
+++ branches/edi/flexi-streams.asd Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.71 2008/05/25 23:42:28 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -35,7 +35,7 @@
(in-package :flexi-streams-system)
(defsystem :flexi-streams
- :version "0.15.3"
+ :version "1.0.0"
:serial t
:components ((:file "packages")
(:file "mapping")
Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp (original)
+++ branches/edi/length.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.4 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -72,51 +72,50 @@
;; the estimate unexact
(* 1.02d0 (call-next-method)))
-(defgeneric check-end (format start end i warnp)
+(defgeneric check-end (format start end i)
(declare #.*fixnum-optimize-settings*)
(:documentation "Helper function used below to determine if we tried
to read past the end of the sequence.")
- (:method (format start end i warnp)
+ (:method (format start end i)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end i))
- (when (and warnp (> i end))
- (signal-encoding-warning format "These ~A octet~:P can't be ~
+ (when (> i end)
+ (signal-encoding-error format "These ~A octet~:P can't be ~
decoded using ~A as the sequence is too short. ~A octet~:P missing ~
at then end."
- (- end start)
- (external-format-name format)
- (- i end))))
- (:method ((format flexi-utf-16-format) start end i warnp)
+ (- end start)
+ (external-format-name format)
+ (- i end))))
+ (:method ((format flexi-utf-16-format) start end i)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end i))
- (declare (ignore i warnp))
+ (declare (ignore i))
;; don't warn twice
(when (evenp (- end start))
(call-next-method))))
-(defgeneric compute-number-of-chars (format sequence start end warnp)
+(defgeneric compute-number-of-chars (format sequence start end)
(declare #.*standard-optimize-settings*)
(:documentation "Computes the exact number of characters required to
decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT. If WARNP is NIL, warnings will be muffled."))
+external format FORMAT."))
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+(defmethod compute-number-of-chars :around (format (list list) start end)
(declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'vector) start end warnp))
+ (call-next-method format (coerce list 'vector) start end))
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence warnp))
+ (declare (ignore sequence))
(- end start))
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end)
;; this method only applies to the 8-bit formats as all other
;; formats with CRLF line endings have their own specialized methods
;; below
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
- (declare (ignore warnp))
(let ((i start)
(length (- end start)))
(declare (fixnum i length))
@@ -130,7 +129,7 @@
(decf length)))
length))
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -140,17 +139,18 @@
(when (>= i end)
(return))
(let* ((octet (aref sequence i))
+ ;; note that there are no validity checks here
(length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
+ ((= #b11000000 (logand* octet #b11100000)) 2)
+ ((= #b11100000 (logand* octet #b11110000)) 3)
(t 4))))
(declare (fixnum length) (type octet octet))
(incf sum)
(incf i length)))
- (check-end format start end i warnp)
+ (check-end format start end i)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -161,28 +161,29 @@
(when (>= i end)
(return))
(let* ((octet (aref sequence i))
+ ;; note that there are no validity checks here
(length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
+ ((= #b11000000 (logand* octet #b11100000)) 2)
+ ((= #b11100000 (logand* octet #b11110000)) 3)
(t 4))))
(declare (fixnum length) (type octet octet))
(unless (and (= octet +lf+) (= last-octet +cr+))
(incf sum))
(incf i length)
(setq last-octet octet)))
- (check-end format start end i warnp)
+ (check-end format start end i)
sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(declare (ignore sequence))
- (when (and warnp (oddp (- end start)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+ (when (oddp (- end start))
+ (signal-encoding-error format "~A octet~:P cannot be decoded ~
using UTF-16 as ~:*~A is not even."
- (- end start))))
+ (- end start))))
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
@@ -198,10 +199,10 @@
(declare (fixnum length) (type octet high-octet))
(incf sum)
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -217,10 +218,10 @@
(declare (fixnum length) (type octet high-octet))
(incf sum)
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -243,10 +244,10 @@
(aref sequence i)
0))
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -269,29 +270,28 @@
(aref sequence (1+ i))
0))
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(declare (ignore sequence))
(let ((length (- end start)))
- (when (and warnp (plusp (mod length 4)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+ (when (plusp (mod length 4))
+ (signal-encoding-error format "~A octet~:P cannot be decoded ~
using UTF-32 as ~:*~A is not a multiple-value of four."
- length))))
+ length))))
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence warnp))
+ (declare (ignore sequence))
(ceiling (- end start) 4))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
- (declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
(decf end 8)
@@ -306,10 +306,9 @@
(t (incf i 4))))
length))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
- (declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
(decf end 8)
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp (original)
+++ branches/edi/packages.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.38 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -46,12 +46,10 @@
:external-format-eol-style
:external-format-error
:external-format-encoding-error
- :external-format-encoding-warning
:external-format-equal
:external-format-id
:external-format-little-endian
:external-format-name
- :external-format-warning
:flexi-input-stream
:flexi-output-stream
:flexi-io-stream
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.32 2008/05/25 23:09:13 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -72,11 +72,12 @@
(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
"Kind of the inverse of OCTET-LENGTH. Returns the length of the
subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT.
+if decoded using the external format EXTERNAL-FORMAT. Note that this
+function doesn't check for the validity of the data in SEQUENCE.
This function is optimized for the case of SEQUENCE being a vector.
Don't use lists if you're in a hurry."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
- (compute-number-of-chars external-format sequence start end t))
+ (compute-number-of-chars external-format sequence start end))
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp (original)
+++ branches/edi/test/test.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.35 2008/05/25 23:10:47 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,48 @@
(in-package :flexi-streams-test)
+(defmacro with-test ((test-description) &body body)
+ "Defines a test. Two utilities are available inside of the body of
+the maco: The function FAIL, and the macro CHECK. FAIL, the lowest
+level utility, marks the test defined by WITH-TEST as failed. CHECK
+checks whether its argument is true, otherwise it calls FAIL. If
+during evaluation of the specified expression any condition is
+signalled, this is also considered a failure.
+
+WITH-TEST prints reports while the tests run. It also increments
+*TEST-SUCCESS-COUNT* if a test completes successfully."
+ (flex::with-unique-names (successp)
+ `(let ((,successp t))
+ (flet ((fail (format-str &rest format-args)
+ (setf ,successp nil)
+ (apply #'format *error-output* format-str format-args)))
+ (macrolet ((check (expression)
+ `(handler-case
+ (unless ,expression
+ (fail "Expression ~S failed.~%" ',expression))
+ (error (c)
+ (fail "Expression ~S failed signalling error of type ~A: ~A.~%"
+ ',expression (type-of c) c))))
+ (with-expected-error ((condition-type) &body body)
+ `(handler-case (progn ,@body)
+ (,condition-type () t)
+ (:no-error (&rest args)
+ (declare (ignore args))
+ (fail "Expected condition ~S not signalled~%"
+ ',condition-type)))))
+ (format *error-output* "Test ~S~%" ,test-description)
+ ,@body
+ (if ,successp
+ (incf *test-success-counter*)
+ (format *error-output* " Test failed!!!~%"))
+ (terpri *error-output*)
+ (terpri *error-output*))
+ ,successp))))
+
+;; LW can't indent this correctly because it's in a MACROLET
+#+:lispworks
+(editor:setup-indent "with-expected-error" 1 2 4)
+
(defconstant +buffer-size+ 8192
"Size of buffers for COPY-STREAM* below.")
@@ -245,37 +287,6 @@
(setf (fill-pointer string) (read-sequence string in))
string)))
-(defmacro with-test ((test-description) &body body)
- "Defines a test. Two utilities are available inside of the body of
-the maco: The function FAIL, and the macro CHECK. FAIL, the lowest
-level utility, marks the test defined by WITH-TEST as failed. CHECK
-checks whether its argument is true, otherwise it calls FAIL. If
-during evaluation of the specified expression any condition is
-signalled, this is also considered a failure.
-
-WITH-TEST prints reports while the tests run. It also increments
-*TEST-SUCCESS-COUNT* if a test completes successfully."
- (flex::with-unique-names (successp)
- `(let ((,successp t))
- (flet ((fail (format-str &rest format-args)
- (setf ,successp nil)
- (apply #'format *error-output* format-str format-args)))
- (macrolet ((check (expression)
- `(handler-case
- (unless ,expression
- (fail "Expression ~S failed.~%" ',expression))
- (error (c)
- (fail "Expression ~S failed signalling error of type ~A: ~A.~%"
- ',expression (type-of c) c)))))
- (format *error-output* "Test ~S~%" ,test-description)
- ,@body
- (if ,successp
- (incf *test-success-counter*)
- (format *error-output* " Test failed!!!~%"))
- (terpri *error-output*)
- (terpri *error-output*))
- ,successp))))
-
(defun old-string-to-octets (string &key
(external-format (make-external-format :latin1))
(start 0) end)
@@ -460,7 +471,51 @@
(defun error-handling-test ()
"Tests several possible errors and how they are handled."
- (with-test ("Handling of errors.")
+ (with-test ("Illegal values.")
+ (macrolet ((want-encoding-error (input format)
+ `(with-expected-error (external-format-encoding-error)
+ (read-flexi-line* ,input ,format))))
+ ;; "overlong"
+ (want-encoding-error #(#b11000000) :utf-8)
+ (want-encoding-error #(#b11000001) :utf-8)
+ ;; examples of invalid lead octets
+ (want-encoding-error #(#b11111000) :utf-8)
+ (want-encoding-error #(#b11111001) :utf-8)
+ (want-encoding-error #(#b11111100) :utf-8)
+ (want-encoding-error #(#b11111101) :utf-8)
+ (want-encoding-error #(#b11111110) :utf-8)
+ (want-encoding-error #(#b11111111) :utf-8)
+ ;; illegal code points
+ (want-encoding-error #(#x00 #x00 #x11 #x00) :utf-32le)
+ (want-encoding-error #(#x00 #xd8) :utf-16le)
+ (want-encoding-error #(#xff #xdf) :utf-16le)))
+ (with-test ("Illegal lengths.")
+ (macrolet ((want-encoding-error (input format)
+ `(with-expected-error (external-format-encoding-error)
+ (read-flexi-line* ,input ,format))))
+ ;; UTF-8 sequences which are too short
+ (want-encoding-error #(#xe4 #xf6 #xfc) :utf8)
+ (want-encoding-error #(#xc0) :utf8)
+ (want-encoding-error #(#xe0 #xff) :utf8)
+ (want-encoding-error #(#xf0 #xff #xff) :utf8)
+ ;; UTF-16 wants an even number of octets
+ (want-encoding-error #(#x01) :utf-16le)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-16le)
+ (want-encoding-error #(#x01) :utf-16be)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-16be)
+ ;; another word should follow but it doesn't
+ (want-encoding-error #(#x01 #xd8) :utf-16le)
+ (want-encoding-error #(#xd8 #x01) :utf-16be)
+ ;; UTF-32 always wants four octets
+ (want-encoding-error #(#x01) :utf-32le)
+ (want-encoding-error #(#x01 #x01) :utf-32le)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-32le)
+ (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32le)
+ (want-encoding-error #(#x01) :utf-32be)
+ (want-encoding-error #(#x01 #x01) :utf-32be)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-32be)
+ (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32be)))
+ (with-test ("Errors while decoding and substitution of characters.")
;; handling of EOF in the middle of CRLF
(check (string= #.(string #\Return)
(read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf))))
@@ -472,11 +527,7 @@
(check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))
(check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))
;; not a valid UTF-8 sequence
- (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))
- (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
- ;; UTF-8 can't start neither with #b11111110 nor with #b11111111
- (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8)))
- (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+ (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
(let ((*substitution-char* nil))
;; :ASCII doesn't have characters with char codes > 127
(check (string= "abc" (using-values (#\b #\c)
@@ -490,16 +541,12 @@
(read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))))
;; not a valid UTF-8 sequence
(check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
- (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
;; UTF-8 can't start neither with #b11111110 nor with #b11111111
(check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8))))
- (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
;; only one byte
(check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le))))
- (check (string= "" (read-flexi-line* #(#x01) :utf-16le)))
;; two bytes, but value of resulting word suggests that another word follows
(check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le))))
- (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le))))
;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff
(check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le))))
(check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le))))
@@ -507,11 +554,10 @@
(check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be))))
(check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be))))
(check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be))))
- (check (string= "" (read-flexi-line* #(#x01) :utf-16be)))
- (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be))))
(check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be))))
- ;; the only case when error is signalled for UTF-32 is at end of file
- ;; in the middle of 4-byte sequence, both for big and little endian
+ ;; the only case when errors are signalled for UTF-32 is at end
+ ;; of file in the middle of 4-byte sequence, both for big and
+ ;; little endian
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le))))
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le))))
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le))))
@@ -521,17 +567,7 @@
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be))))
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be))))
(check (string= "aY" (using-values (#\Y)
- (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le))))
- (check (string= "aY" (using-values (#\Y)
- (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be))))
- (check (string= "aY" (using-values (#\Y)
- (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
+ (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
(defun unread-char-test ()
"Tests whether UNREAD-CHAR behaves as expected."
@@ -572,7 +608,7 @@
(incf no-tests (length read-sequence-test-args-list))
(dolist (args read-sequence-test-args-list)
(apply 'sequence-test args)))
- (incf no-tests)
+ (incf no-tests 3)
(error-handling-test)
(incf no-tests)
(unread-char-test)
1
0
Author: eweitz
Date: Sun May 25 17:36:37 2008
New Revision: 60
Modified:
branches/edi/encode.lisp
branches/edi/util.lisp
Log:
Help some Lisps optimize the encoding functions
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sun May 25 17:36:37 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.22 2008/05/25 20:44:03 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.23 2008/05/25 21:26:12 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -203,65 +203,67 @@
(octet-writer octet))))
(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
+ ;; the old version using LDB was more elegant, but some Lisps had
+ ;; trouble optimizing it
(let ((char-code (char-code char-getter)))
(tagbody
(cond ((< char-code #x80)
(octet-writer char-code)
(go zero))
((< char-code #x800)
- (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code)))
+ (octet-writer (logior* #b11000000 (ash* char-code -6)))
(go one))
((< char-code #x10000)
- (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code)))
+ (octet-writer (logior* #b11100000 (ash* char-code -12)))
(go two))
(t
- (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code)))))
- (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code)))
+ (octet-writer (logior* #b11110000 (ash* char-code -18)))))
+ (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12))))
two
- (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code)))
+ (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6))))
one
- (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code)))
+ (octet-writer (logior* #b10000000 (logand* #b00111111 char-code)))
zero)))
(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
(flet ((write-word (word)
- (octet-writer (ldb (byte 8 0) word))
- (octet-writer (ldb (byte 8 8) word))))
+ (octet-writer (logand* #x00ff word))
+ (octet-writer (ash* (logand* #xff00 word) -8))))
(declare (inline write-word))
(let ((char-code (char-code char-getter)))
(declare (type char-code-integer char-code))
(cond ((< char-code #x10000)
(write-word char-code))
(t (decf char-code #x10000)
- (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
- (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
+ (write-word (logior* #xd800 (ash* char-code -10)))
+ (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
(flet ((write-word (word)
- (octet-writer (ldb (byte 8 8) word))
- (octet-writer (ldb (byte 8 0) word))))
+ (octet-writer (ash* (logand* #xff00 word) -8))
+ (octet-writer (logand* #x00ff word))))
(declare (inline write-word))
(let ((char-code (char-code char-getter)))
(declare (type char-code-integer char-code))
(cond ((< char-code #x10000)
(write-word char-code))
(t (decf char-code #x10000)
- (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
- (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
+ (write-word (logior* #xd800 (ash* char-code -10)))
+ (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
(let ((char-code (char-code char-getter)))
- (octet-writer (ldb (byte 8 0) char-code))
- (octet-writer (ldb (byte 8 8) char-code))
- (octet-writer (ldb (byte 8 16) char-code))
- (octet-writer (ldb (byte 8 24) char-code))))
+ (octet-writer (logand* #x00ff char-code))
+ (octet-writer (logand* #x00ff (ash* char-code -8)))
+ (octet-writer (logand* #x00ff (ash* char-code -16)))
+ (octet-writer (logand* #x00ff (ash* char-code -24)))))
(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
(let ((char-code (char-code char-getter)))
- (octet-writer (ldb (byte 8 24) char-code))
- (octet-writer (ldb (byte 8 16) char-code))
- (octet-writer (ldb (byte 8 8) char-code))
- (octet-writer (ldb (byte 8 0) char-code))))
+ (octet-writer (logand* #x00ff (ash* char-code -24)))
+ (octet-writer (logand* #x00ff (ash* char-code -16)))
+ (octet-writer (logand* #x00ff (ash* char-code -8)))
+ (octet-writer (logand* #x00ff char-code))))
(defmethod char-to-octets ((format flexi-cr-mixin) char writer)
(declare #.*fixnum-optimize-settings*)
Modified: branches/edi/util.lisp
==============================================================================
--- branches/edi/util.lisp (original)
+++ branches/edi/util.lisp Sun May 25 17:36:37 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.23 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.24 2008/05/25 21:26:12 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -192,4 +192,16 @@
"Tries to `rewind' the \(binary) stream STREAM by OCTETS octets.
Returns a true value if it succeeds."
(when-let (position (file-position stream))
- (file-position stream (- position octets))))
\ No newline at end of file
+ (file-position stream (- position octets))))
+
+(defmacro logand* (x y)
+ "Solely for optimization purposes. Some Lisps need it, some don't."
+ `(the fixnum (logand ,x ,y)))
+
+(defmacro logior* (x y)
+ "Solely for optimization purposes. Some Lisps need it, some don't."
+ `(the fixnum (logior ,x ,y)))
+
+(defmacro ash* (integer count)
+ "Solely for optimization purposes. Some Lisps need it, some don't."
+ `(the fixnum (ash ,integer ,count)))
1
0
Author: eweitz
Date: Sun May 25 16:45:09 2008
New Revision: 59
Modified:
branches/edi/decode.lisp
branches/edi/encode.lisp
Log:
ANSI compliance fix
Tests pass on ClozureCL and AllegroCL now
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 16:45:09 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -67,7 +67,7 @@
(defmethod octets-to-string* :around (format (list list) start end)
(declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'vector) start end))
+ (octets-to-string* format (coerce list 'vector) start end))
(defmacro define-sequence-readers ((format-class) &body body)
"Non-hygienic utility macro which defines methods for READ-SEQUENCE*
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sun May 25 16:45:09 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.22 2008/05/25 20:44:03 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -49,7 +49,7 @@
(defmethod string-to-octets* :around (format (list list) start end)
(declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'string*) start end))
+ (string-to-octets* format (coerce list 'string*) start end))
(defmacro define-sequence-writers ((format-class) &body body)
"Non-hygienic utility macro which defines methods for
1
0
Author: eweitz
Date: Sun May 25 16:28:25 2008
New Revision: 58
Modified:
branches/edi/decode.lisp
branches/edi/doc/index.html
branches/edi/encode.lisp
branches/edi/input.lisp
branches/edi/length.lisp
branches/edi/mapping.lisp
branches/edi/strings.lisp
Log:
Optimized the other direction as well
Passes tests on LispWorks
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -60,26 +60,217 @@
The special variable *CURRENT-UNREADER* must be bound correctly
whenever this function is called."))
-(defmethod octets-to-char-code ((format flexi-latin-1-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
- (funcall reader))
+(defgeneric octets-to-string* (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "A generic function which dispatches on the external
+format and does the real work for OCTETS-TO-STRING."))
-(defmethod octets-to-char-code ((format flexi-ascii-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
- (when-let (octet (funcall reader))
+(defmethod octets-to-string* :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'vector) start end))
+
+(defmacro define-sequence-readers ((format-class) &body body)
+ "Non-hygienic utility macro which defines methods for READ-SEQUENCE*
+and OCTETS-TO-STRING* for the class FORMAT-CLASS. BODY is described
+in the docstring of DEFINE-CHAR-ENCODERS but can additionally contain
+a form (UNGET <form>) which has to be replaced by the correct code to
+`unread' the octets for the character designated by <form>."
+ (let* ((body `((block char-decoder
+ (locally
+ (declare #.*fixnum-optimize-settings*)
+ ,@body)))))
+ `(progn
+ (defmethod read-sequence* ((format ,format-class) flexi-input-stream sequence start end)
+ (with-accessors ((position flexi-stream-position)
+ (bound flexi-stream-bound)
+ (octet-stack flexi-stream-octet-stack)
+ (last-octet flexi-stream-last-octet)
+ (last-char-code flexi-stream-last-char-code)
+ (stream flexi-stream-stream))
+ flexi-input-stream
+ (let* (buffer
+ (buffer-pos 0)
+ (buffer-end 0)
+ (index start)
+ ;; whether we will later be able to rewind the stream if
+ ;; needed (to get rid of unused octets in the buffer)
+ (can-rewind-p (maybe-rewind stream 0))
+ (factor (encoding-factor format))
+ (integer-factor (floor factor))
+ ;; it's an interesting question whether it makes sense
+ ;; performance-wise to make RESERVE significantly bigger
+ ;; (and thus put potentially a lot more octets into
+ ;; OCTET-STACK), especially for UTF-8
+ (reserve (cond ((not (floatp factor)) 0)
+ ((not can-rewind-p) (* 2 integer-factor))
+ (t (ceiling (* (- factor integer-factor) (- end start)))))))
+ (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
+ (boolean can-rewind-p))
+ (flet ((compute-fill-amount ()
+ "Computes the amount of octets we can savely read into
+the buffer without violating the stream's bound \(if there is one) and
+without potentially reading much more than we need \(unless we can
+rewind afterwards)."
+ (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
+ (the fixnum (- end index))))
+ reserve))
+ +buffer-size+)))
+ (cond (bound (min minimum (- bound position)))
+ (t minimum))))
+ (fill-buffer (end)
+ "Tries to fill the buffer from BUFFER-POS to END and
+returns NIL if the buffer doesn't contain any new data."
+ ;; put data from octet stack into buffer if there is any
+ (loop
+ (when (>= buffer-pos end)
+ (return))
+ (let ((next-octet (pop octet-stack)))
+ (cond (next-octet
+ (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
+ (incf buffer-pos))
+ (t (return)))))
+ (setq buffer-end (read-sequence buffer stream
+ :start buffer-pos
+ :end end))
+ ;; BUFFER-POS is only greater than zero if the buffer
+ ;; already contains unread data from the octet stack
+ ;; (see below), so we test for ZEROP here and do /not/
+ ;; compare with BUFFER-POS
+ (unless (zerop buffer-end)
+ (incf position buffer-end))))
+ (let ((minimum (compute-fill-amount)))
+ (declare (fixnum minimum))
+ (setq buffer (make-octet-buffer minimum))
+ ;; fill buffer for the first time or return immediately if
+ ;; we don't succeed
+ (unless (fill-buffer minimum)
+ (return-from read-sequence* start)))
+ (setq buffer-pos 0)
+ (macrolet ((iterate (set-place)
+ "A very unhygienic macro to implement the
+actual iteration through the sequence including housekeeping for the
+flexi stream. SET-PLACE is the place \(using the index INDEX) used to
+access the sequence."
+ `(flet ((leave ()
+ "This is the function used to
+abort the LOOP iteration below."
+ (when (> index start)
+ (setq last-octet nil
+ last-char-code ,(sublis '((index . (1- index))) set-place)))
+ (return-from read-sequence* index)))
+ (loop
+ (when (>= index end)
+ ;; check if there are octets in the
+ ;; buffer we didn't use - see
+ ;; COMPUTE-FILL-AMOUNT above
+ (let ((rest (- buffer-end buffer-pos)))
+ (when (plusp rest)
+ (or (and can-rewind-p
+ (maybe-rewind stream rest))
+ (loop
+ (when (>= buffer-pos buffer-end)
+ (return))
+ (decf buffer-end)
+ (push (aref (the (array octet *) buffer) buffer-end)
+ octet-stack)))))
+ (leave))
+ (let ((next-char-code
+ (progn (symbol-macrolet
+ ((octet-getter
+ ;; this is the code to retrieve the next octet (or
+ ;; NIL) and to fill the buffer if needed
+ (block next-octet
+ (when (>= buffer-pos buffer-end)
+ (setq buffer-pos 0)
+ (unless (fill-buffer (compute-fill-amount))
+ (return-from next-octet)))
+ (prog1
+ (aref (the (array octet *) buffer) buffer-pos)
+ (incf buffer-pos)))))
+ (macrolet ((unget (form)
+ `(unread-char% ,form flexi-input-stream)))
+ ,',@body)))))
+ (unless next-char-code
+ (leave))
+ (setf ,set-place (code-char next-char-code))
+ (incf index))))))
+ (etypecase sequence
+ (string (iterate (char sequence index)))
+ (array (iterate (aref sequence index)))
+ (list (iterate (nth index sequence)))))))))
+ (defmethod octets-to-string* ((format ,format-class) sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end))
+ (let* ((i start)
+ (string-length (compute-number-of-chars format sequence start end nil))
+ (string (make-array string-length :element-type 'char*)))
+ (declare (fixnum i string-length))
+ (loop for j of-type fixnum from 0 below string-length
+ do (setf (schar string j)
+ (code-char (macrolet ((unget (form)
+ `(decf i (character-length format ,form))))
+ (symbol-macrolet ((octet-getter (and (< i end)
+ (prog1
+ (aref sequence i)
+ (incf i)))))
+ ,@body))))
+ finally (return string)))))))
+
+(defmacro define-char-decoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+ "Non-hygienic utility macro which defines several decoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to read octets and return one
+character. BODY must contain a symbol OCTET-GETTER representing the
+form which is used to obtain the next octet."
+ `(progn
+ (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (function reader))
+ (symbol-macrolet ((octet-getter (funcall reader)))
+ ,@(sublis '((char-decoder . octets-to-char-code))
+ body)))
+ (define-sequence-readers (,lf-format-class) ,@body)
+ (define-sequence-readers (,cr-format-class)
+ ,(with-unique-names (char-code)
+ `(let ((,char-code (progn ,@body)))
+ (case ,char-code
+ (#.+cr+ #.(char-code #\Newline))
+ (otherwise ,char-code)))))
+ (define-sequence-readers (,crlf-format-class)
+ ,(with-unique-names (char-code next-char-code get-char-code)
+ `(flet ((,get-char-code () ,@body))
+ (let ((,char-code (,get-char-code)))
+ (case ,char-code
+ (#.+cr+
+ (let ((,next-char-code (,get-char-code)))
+ (case ,next-char-code
+ (#.+lf+ #.(char-code #\Newline))
+ ;; we saw a CR but no LF afterwards, but then the data
+ ;; ended, so we just return #\Return
+ ((nil) +cr+)
+ ;; if the character we peeked at wasn't a
+ ;; linefeed character we unread its constituents
+ (otherwise (unget (code-char ,next-char-code))
+ ,char-code))))
+ (otherwise ,char-code))))))))
+
+(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
+ octet-getter)
+
+(define-char-decoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+ (when-let (octet octet-getter)
(if (> (the octet octet) 127)
(recover-from-encoding-error format
"No character which corresponds to octet #x~X." octet)
octet)))
-(defmethod octets-to-char-code ((format flexi-8-bit-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
(with-accessors ((decoding-table external-format-decoding-table))
format
- (when-let (octet (funcall reader))
+ (when-let (octet octet-getter)
(let ((char-code (aref (the (simple-array char-code-integer *) decoding-table)
(the octet octet))))
(if (or (null char-code)
@@ -88,19 +279,17 @@
"No character which corresponds to octet #x~X." octet)
char-code)))))
-(defmethod octets-to-char-code ((format flexi-utf-8-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-8 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(let ((octet (read-next-byte)))
(declare (type octet octet))
@@ -113,11 +302,7 @@
(values (logand octet #b00001111) 2))
((= #b11110000 (logand octet #b11111000))
(values (logand octet #b00000111) 3))
- ((= #b11111000 (logand octet #b11111100))
- (values (logand octet #b00000011) 4))
- ((= #b11111100 (logand octet #b11111110))
- (values (logand octet #b00000001) 5))
- (t (return-from octets-to-char-code
+ (t (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X at start of UTF-8 sequence."
octet))))
@@ -130,24 +315,22 @@
repeat count
for octet of-type octet = (read-next-byte)
unless (= #b10000000 (logand octet #b11000000))
- do (return-from octets-to-char-code
+ do (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X in UTF-8 sequence." octet))
finally (return result)))))))
-(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-16 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(flet ((read-next-word ()
(+ (the octet (read-next-byte))
@@ -159,7 +342,7 @@
(let ((next-word (read-next-word)))
(declare (type (unsigned-byte 16) next-word))
(unless (<= #xdc00 next-word #xdfff)
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
@@ -168,19 +351,17 @@
#x10000)))
(t word)))))))
-(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-16 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(flet ((read-next-word ()
(+ (ash (the octet (read-next-byte)) 8)
@@ -192,7 +373,7 @@
(let ((next-word (read-next-word)))
(declare (type (unsigned-byte 16) next-word))
(unless (<= #xdc00 next-word #xdfff)
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
@@ -201,37 +382,33 @@
#x10000)))
(t word)))))))
-(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-32 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(loop for count of-type fixnum from 0 to 24 by 8
for octet of-type octet = (read-next-byte)
sum (ash octet count)))))
-(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-32 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(loop for count of-type fixnum from 24 downto 0 by 8
for octet of-type octet = (read-next-byte)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html (original)
+++ branches/edi/doc/index.html Sun May 25 16:28:25 2008
@@ -996,7 +996,7 @@
<h4><a name="strings" class=none>Strings</a></h4>
-This section collects a few convenience functions for strings conversions:
+This section collects a few convenience functions for strings conversions.
<p><br>[Function]
<br><a class=none name="string-to-octets"><b>string-to-octets</b> <i>string <tt>&key</tt> external-format start end</i> => <i>vector</i></a>
@@ -1009,7 +1009,9 @@
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the string. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
-
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
</blockquote>
<p><br>[Function]
@@ -1023,6 +1025,11 @@
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
</blockquote>
<p><br>[Function]
@@ -1030,14 +1037,17 @@
<blockquote><br>
-Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
+Returns the length of the subsequence of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
<a href="#octet">octets</a> if encoded using
the <a href="#external-formats">external format</a> designated
by <code><i>external-format</i></code>.
The defaults for
<code><i>start</i></code> and <code><i>end</i></code>
-are <code>0</code> and the length of the string. The default
+are <code>0</code> and the length of <code><i>string</i></code>. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
</blockquote>
<p><br>[Function]
@@ -1054,6 +1064,11 @@
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
</blockquote>
<br> <br><h3><a class=none name="position">File positions</a></h3>
@@ -1095,7 +1110,7 @@
his work on making FLEXI-STREAMS faster.
<p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -47,130 +47,140 @@
(:documentation "A generic function which dispatches on the external
format and does the real work for STRING-TO-OCTETS."))
+(defmethod string-to-octets* :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'string*) start end))
+
(defmacro define-sequence-writers ((format-class) &body body)
- "Utility macro which defines methods for WRITE-SEQUENCE* and
-STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the
-docstring of DEFINE-CHAR-ENCODERS."
- `(progn
- (defmethod write-sequence* ((format ,format-class) stream sequence start end)
- (declare #.*standard-optimize-settings*)
- (declare (fixnum start end))
- (with-accessors ((column flexi-stream-column))
- stream
- (let* ((octet-seen-p nil)
- (buffer-pos 0)
- ;; estimate should be good enough...
- (factor (encoding-factor format))
- ;; we don't want arbitrarily large buffer, do we?
- (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
- (buffer (make-octet-buffer buffer-size)))
- (declare (fixnum buffer-pos buffer-size)
- (boolean octet-seen-p)
- (type (array octet *) buffer))
- (macrolet ((octet-writer (form)
- `(write-octet ,form)))
- (labels ((flush-buffer ()
- "Sends all octets in BUFFER to the underlying stream."
- (write-sequence buffer stream :end buffer-pos)
- (setq buffer-pos 0))
- (write-octet (octet)
- "Adds one octet to the buffer and flushes it if necessary."
- (declare (type octet octet))
- (when (>= buffer-pos buffer-size)
- (flush-buffer))
- (setf (aref buffer buffer-pos) octet)
- (incf buffer-pos))
- (write-object (object)
- "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
- (etypecase object
- (octet (setq octet-seen-p t)
- (write-octet object))
- (character (symbol-macrolet ((char-getter object))
- ,@body)))))
- (macrolet ((iterate (&body output-forms)
- "An unhygienic macro to implement the actual
-iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
- `(loop for index of-type fixnum from start below end
- do (progn ,@output-forms)
- finally (when (plusp buffer-pos)
- (flush-buffer)))))
- (etypecase sequence
- (string (iterate
- (symbol-macrolet ((char-getter (char sequence index)))
- ,@body)))
- (array (iterate
- (symbol-macrolet ((char-getter (aref sequence index)))
- ,@body)))
- (list (iterate (write-object (nth index sequence))))))
- ;; update the column slot, setting it to NIL if we sent
- ;; octets
- (setq column
- (cond (octet-seen-p nil)
- (t (let ((last-newline-pos (position #\Newline sequence
- :test #'char=
- :start start
- :end end
- :from-end t)))
- (cond (last-newline-pos (- end last-newline-pos 1))
- (column (+ column (- end start)))))))))))))
- (defmethod string-to-octets* ((format ,format-class) string start end)
- (declare #.*standard-optimize-settings*)
- (declare (fixnum start end) (string string))
- (let ((octets (make-array (compute-number-of-octets format string start end)
- :element-type 'octet))
- (j 0))
- (declare (fixnum j))
- (loop for i of-type fixnum from start below end do
- (macrolet ((octet-writer (form)
- `(progn
- (setf (aref (the (array octet *) octets) j) ,form)
- (incf j))))
- (symbol-macrolet ((char-getter (char string i)))
- (progn ,@body))))
- octets))))
-
-;; char-getter can be called more than once - no side effects
-(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
- "Utility macro which defines several encoding-related methods for
-the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
-it is assumed that CR-FORMAT-CLASS is the same encoding as
-FORMAT-CLASS but with CR line endings and similar for
-CRLF-FORMAT-CLASS. BODY is a code template for the code to convert
-one character to octets. BODY must contain a symbol CHAR-GETTER
-representing the form which is used to obtain the character and a
-forms like \(OCTET-WRITE <thing>) to write the octet <thing>. The
-CHAR-GETTER form might be called more than once."
+ "Non-hygienic utility macro which defines methods for
+WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For
+BODY see the docstring of DEFINE-CHAR-ENCODERS."
(let ((body `((locally
(declare #.*fixnum-optimize-settings*)
,@body))))
`(progn
- (defmethod char-to-octets ((format ,format-class) char writer)
- (declare (character char) (function writer))
- (symbol-macrolet ((char-getter char))
- (macrolet ((octet-writer (form)
- `(funcall writer ,form)))
- ,@body)))
- (define-sequence-writers (,format-class) ,@body)
- (define-sequence-writers (,cr-format-class)
- ,@(sublis `((char-getter . ,(with-unique-names (char)
- `(let ((,char char-getter))
- (declare (character ,char))
- (if (char= ,char #\Newline)
- #\Return
- ,char)))))
- body))
- (define-sequence-writers (,crlf-format-class)
- ,(with-unique-names (char write-char)
- `(flet ((,write-char (,char)
- ,@(sublis `((char-getter . ,char)) body)))
- (let ((,char char-getter))
- (declare (character ,char))
- (cond ((char= ,char #\Newline)
- (,write-char #\Return)
- (,write-char #\Newline))
- (t (,write-char ,char))))))))))
+ (defmethod string-to-octets* ((format ,format-class) string start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end) (string string))
+ (let ((octets (make-array (compute-number-of-octets format string start end)
+ :element-type 'octet))
+ (j 0))
+ (declare (fixnum j))
+ (loop for i of-type fixnum from start below end do
+ (macrolet ((octet-writer (form)
+ `(progn
+ (setf (aref (the (array octet *) octets) j) ,form)
+ (incf j))))
+ (symbol-macrolet ((char-getter (char string i)))
+ (progn ,@body))))
+ octets))
+ (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end))
+ (with-accessors ((column flexi-stream-column))
+ stream
+ (let* ((octet-seen-p nil)
+ (buffer-pos 0)
+ ;; estimate should be good enough...
+ (factor (encoding-factor format))
+ ;; we don't want arbitrarily large buffer, do we?
+ (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+ (buffer (make-octet-buffer buffer-size)))
+ (declare (fixnum buffer-pos buffer-size)
+ (boolean octet-seen-p)
+ (type (array octet *) buffer))
+ (macrolet ((octet-writer (form)
+ `(write-octet ,form)))
+ (labels ((flush-buffer ()
+ "Sends all octets in BUFFER to the underlying stream."
+ (write-sequence buffer stream :end buffer-pos)
+ (setq buffer-pos 0))
+ (write-octet (octet)
+ "Adds one octet to the buffer and flushes it if necessary."
+ (declare (type octet octet))
+ (when (>= buffer-pos buffer-size)
+ (flush-buffer))
+ (setf (aref buffer buffer-pos) octet)
+ (incf buffer-pos))
+ (write-object (object)
+ "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+ (etypecase object
+ (octet (setq octet-seen-p t)
+ (write-octet object))
+ (character (symbol-macrolet ((char-getter object))
+ ,@body)))))
+ (macrolet ((iterate (&body output-forms)
+ "An unhygienic macro to implement the actual
+iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+ `(loop for index of-type fixnum from start below end
+ do (progn ,@output-forms)
+ finally (when (plusp buffer-pos)
+ (flush-buffer)))))
+ (etypecase sequence
+ (string (iterate
+ (symbol-macrolet ((char-getter (char sequence index)))
+ ,@body)))
+ (array (iterate
+ (symbol-macrolet ((char-getter (aref sequence index)))
+ ,@body)))
+ (list (iterate (write-object (nth index sequence))))))
+ ;; update the column slot, setting it to NIL if we sent
+ ;; octets
+ (setq column
+ (cond (octet-seen-p nil)
+ (t (let ((last-newline-pos (position #\Newline sequence
+ :test #'char=
+ :start start
+ :end end
+ :from-end t)))
+ (cond (last-newline-pos (- end last-newline-pos 1))
+ (column (+ column (- end start))))))))))))))))
+
+(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+ "Non-hygienic utility macro which defines several encoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to convert one character to
+octets. BODY must contain a symbol CHAR-GETTER representing the form
+which is used to obtain the character and a forms like \(OCTET-WRITE
+<thing>) to write the octet <thing>. The CHAR-GETTER form might be
+called more than once."
+ `(progn
+ (defmethod char-to-octets ((format ,lf-format-class) char writer)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (character char) (function writer))
+ (symbol-macrolet ((char-getter char))
+ (macrolet ((octet-writer (form)
+ `(funcall writer ,form)))
+ ,@body)))
+ (define-sequence-writers (,lf-format-class) ,@body)
+ (define-sequence-writers (,cr-format-class)
+ ;; modify the body so that the getter replaces a #\Newline
+ ;; with a #\Return
+ ,@(sublis `((char-getter . ,(with-unique-names (char)
+ `(let ((,char char-getter))
+ (declare (character ,char))
+ (if (char= ,char #\Newline)
+ #\Return
+ ,char)))))
+ body))
+ (define-sequence-writers (,crlf-format-class)
+ ;; modify the body so that we potentially write octets for
+ ;; two characters (#\Return and #\Linefeed) - the original
+ ;; body is wrapped with the WRITE-CHAR local function
+ ,(with-unique-names (char write-char)
+ `(flet ((,write-char (,char)
+ ,@(sublis `((char-getter . ,char)) body)))
+ (let ((,char char-getter))
+ (declare (character ,char))
+ (cond ((char= ,char #\Newline)
+ (,write-char #\Return)
+ (,write-char #\Linefeed))
+ (t (,write-char ,char)))))))))
(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
(let ((octet (char-code char-getter)))
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp (original)
+++ branches/edi/input.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.78 2008/05/25 19:25:44 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -201,9 +201,7 @@
others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
- (with-accessors ((position flexi-stream-position)
- (bound flexi-stream-bound)
- (octet-stack flexi-stream-octet-stack)
+ (with-accessors ((octet-stack flexi-stream-octet-stack)
(external-format flexi-stream-external-format)
(last-octet flexi-stream-last-octet)
(last-char-code flexi-stream-last-char-code)
@@ -233,116 +231,8 @@
(setq last-char-code nil
last-octet (elt sequence (1- index))))
(return-from stream-read-sequence index)))
- (let* (buffer
- (buffer-pos 0)
- (buffer-end 0)
- (index start)
- ;; whether we will later be able to rewind the stream if
- ;; needed (to get rid of unused octets in the buffer)
- (can-rewind-p (maybe-rewind stream 0))
- (factor (encoding-factor external-format))
- (integer-factor (floor factor))
- ;; it's an interesting question whether it makes sense
- ;; performance-wise to make RESERVE significantly bigger
- ;; (and thus put potentially a lot more octets into
- ;; OCTET-STACK), especially for UTF-8
- (reserve (cond ((not (floatp factor)) 0)
- ((not can-rewind-p) (* 2 integer-factor))
- (t (ceiling (* (- factor integer-factor) (- end start)))))))
- (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
- (boolean can-rewind-p))
- (flet ((compute-fill-amount ()
- "Computes the amount of octets we can savely read into
-the buffer without violating the stream's bound \(if there is one) and
-without potentially reading much more than we need \(unless we can
-rewind afterwards)."
- (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
- (the fixnum (- end index))))
- reserve))
- +buffer-size+)))
- (cond (bound (min minimum (- bound position)))
- (t minimum))))
- (fill-buffer (end)
- "Tries to fill the buffer from BUFFER-POS to END and
-returns NIL if the buffer doesn't contain any new data."
- ;; put data from octet stack into buffer if there is any
- (loop
- (when (>= buffer-pos end)
- (return))
- (let ((next-octet (pop octet-stack)))
- (cond (next-octet
- (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
- (incf buffer-pos))
- (t (return)))))
- (setq buffer-end (read-sequence buffer stream
- :start buffer-pos
- :end end))
- ;; BUFFER-POS is only greater than zero if the buffer
- ;; already contains unread data from the octet stack
- ;; (see below), so we test for ZEROP here and do /not/
- ;; compare with BUFFER-POS
- (unless (zerop buffer-end)
- (incf position buffer-end))))
- (let ((minimum (compute-fill-amount)))
- (declare (fixnum minimum))
- (setq buffer (make-octet-buffer minimum))
- ;; fill buffer for the first time or return immediately if
- ;; we don't succeed
- (unless (fill-buffer minimum)
- (return-from stream-read-sequence start)))
- (setq buffer-pos 0)
- (flet ((next-octet ()
- "Returns the next octet from the buffer and fills it
-if it is exhausted. Returns NIL if there's no more data on the
-stream."
- (when (>= buffer-pos buffer-end)
- (setq buffer-pos 0)
- (unless (fill-buffer (compute-fill-amount))
- (return-from next-octet)))
- (prog1
- (aref (the (array octet *) buffer) buffer-pos)
- (incf buffer-pos)))
- (unreader (char)
- (unread-char% char flexi-input-stream)))
- (declare (dynamic-extent (function next-octet) (function unreader)))
- (let ((*current-unreader* #'unreader))
- (macrolet ((iterate (set-place)
- "A very unhygienic macro to implement the
-actual iteration through the sequence including housekeeping for the
-flexi stream. SET-PLACE is the place \(using the index INDEX) used to
-access the sequence."
- `(flet ((leave ()
- "This is the function used to abort
-the LOOP iteration below."
- (when (> index start)
- (setq last-octet nil
- last-char-code ,(sublis '((index . (1- index))) set-place)))
- (return-from stream-read-sequence index)))
- (loop
- (when (>= index end)
- ;; check if there are octets in the
- ;; buffer we didn't use - see
- ;; COMPUTE-FILL-AMOUNT above
- (let ((rest (- buffer-end buffer-pos)))
- (when (plusp rest)
- (or (and can-rewind-p
- (maybe-rewind stream rest))
- (loop
- (when (>= buffer-pos buffer-end)
- (return))
- (decf buffer-end)
- (push (aref (the (array octet *) buffer) buffer-end)
- octet-stack)))))
- (leave))
- (let ((next-char-code (octets-to-char-code external-format #'next-octet)))
- (unless next-char-code
- (leave))
- (setf ,set-place (code-char next-char-code))
- (incf index))))))
- (etypecase sequence
- (string (iterate (char sequence index)))
- (array (iterate (aref sequence index)))
- (list (iterate (nth index sequence)))))))))))
+ ;; otherwise hand over to the external format to do the work
+ (read-sequence* external-format flexi-input-stream sequence start end)))
(defmethod stream-unread-char ((stream flexi-input-stream) char)
"Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM.
Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp (original)
+++ branches/edi/length.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -115,7 +115,7 @@
;; formats with CRLF line endings have their own specialized methods
;; below
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore warnp))
(let ((i start)
(length (- end start)))
@@ -132,7 +132,7 @@
(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
@@ -152,7 +152,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start)
(last-octet 0))
@@ -175,7 +175,7 @@
(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore sequence))
(when (and warnp (oddp (- end start)))
(signal-encoding-warning format "~A octet~:P cannot be decoded ~
@@ -203,7 +203,7 @@
(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
@@ -222,7 +222,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start)
(last-octet 0))
@@ -248,7 +248,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start)
(last-octet 0))
@@ -290,7 +290,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
@@ -308,7 +308,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
@@ -330,22 +330,26 @@
encode the sequence of characters in SEQUENCE from START to END using
the external format FORMAT."))
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+(defmethod compute-number-of-octets :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'string*) start end))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence))
+ (declare (ignore string))
(- end start))
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((< char-code #x80) 1)
((< char-code #x800) 2)
((< char-code #x10000) 3)
@@ -355,16 +359,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((= char-code #.(char-code #\Newline)) 2)
((< char-code #x80) 1)
((< char-code #x800) 2)
@@ -375,16 +379,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((< char-code #x10000) 2)
(t 4))))
(declare (fixnum char-length) (type char-code-integer char-code))
@@ -392,16 +396,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
((< char-code #x10000) 2)
(t 4))))
@@ -410,16 +414,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
((< char-code #x10000) 2)
(t 4))))
@@ -428,17 +432,39 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence))
+ (declare (ignore string))
(* 4 (- end start)))
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(+ (call-next-method)
(* (case (external-format-name format)
(:utf-32 4)
(otherwise 1))
- (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
+ (count #\Newline string :start start :end end :test #'char=))))
+
+(defgeneric character-length (format char)
+ (declare #.*fixnum-optimize-settings*)
+ (:documentation "Returns the number of octets needed to encode the
+single character CHAR.")
+ (:method (format char)
+ (compute-number-of-octets format (string char) 0 1)))
+
+(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline)))
+ (declare #.*fixnum-optimize-settings*)
+ (+ (call-next-method format +cr+)
+ (call-next-method format +lf+)))
+
+(defmethod character-length ((format flexi-8-bit-format) char)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (ignore char))
+ 1)
+
+(defmethod character-length ((format flexi-utf-32-format) char)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (ignore char))
+ 4)
\ No newline at end of file
Modified: branches/edi/mapping.lisp
==============================================================================
--- branches/edi/mapping.lisp (original)
+++ branches/edi/mapping.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.3 2008/05/25 19:07:53 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -39,6 +39,12 @@
#+:lispworks 'lw:simple-char
#-:lispworks 'character)
+(deftype string* ()
+ "Convenience shortcut to paper over the difference between LispWorks
+and the other Lisps."
+ #+:lispworks 'lw:text-string
+ #-:lispworks 'string)
+
(deftype char-code-integer ()
"The subtype of integers which can be returned by the function CHAR-CODE."
'(integer 0 #.(1- char-code-limit)))
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -34,7 +34,10 @@
(start 0) (end (length string)))
"Converts the Lisp string STRING from START to END to an array of
octets corresponding to the external format designated by
-EXTERNAL-FORMAT."
+EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
(declare #.*standard-optimize-settings*)
(declare (string string))
(setq external-format (maybe-convert-external-format external-format))
@@ -45,51 +48,22 @@
(external-format :latin1)
(start 0) (end (length sequence)))
"Converts the Lisp sequence SEQUENCE of octets from START to END to
-a string using the external format designated by EXTERNAL-FORMAT."
+a string using the external format designated by EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
- (let* ((i start)
- (reader (etypecase sequence
- ((array octet *)
- (lambda ()
- (and (< i end)
- (prog1
- (aref (the (array octet *) sequence) i)
- (incf i)))))
- ((array * *)
- (lambda ()
- (and (< i end)
- (prog1
- (aref sequence i)
- (incf i)))))
- (list
- (lambda ()
- (and (< i end)
- (prog1
- (nth i sequence)
- (incf i))))))))
- (declare (fixnum i) (dynamic-extent reader))
- (labels ((pseudo-writer (octet)
- (declare (ignore octet))
- (decf i))
- (unreader (char)
- (char-to-octets external-format char #'pseudo-writer)))
- (declare (dynamic-extent (function pseudo-writer) (function unreader)))
- (let ((*current-unreader* #'unreader))
- (flet ((next-char ()
- (code-char (octets-to-char-code external-format reader))))
- (declare (inline next-char))
- (let* ((string-length (compute-number-of-chars external-format sequence start end nil))
- (string (make-array string-length :element-type 'char*)))
- (declare (fixnum string-length))
- (loop for j of-type fixnum from 0 below string-length
- do (setf (schar string j) (next-char))
- finally (return string))))))))
+ ;; the external format knows how to do it...
+ (octets-to-string* external-format sequence start end))
(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
"Returns the length of the substring of STRING from START to END in
-octets if encoded using the external format EXTERNAL-FORMAT."
+octets if encoded using the external format EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end) (string string))
(setq external-format (maybe-convert-external-format external-format))
@@ -98,7 +72,10 @@
(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
"Kind of the inverse of OCTET-LENGTH. Returns the length of the
subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT."
+if decoded using the external format EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
1
0
Author: eweitz
Date: Sun May 25 08:26:47 2008
New Revision: 57
Added:
branches/edi/length.lisp (contents, props changed)
Modified:
branches/edi/decode.lisp
branches/edi/encode.lisp
branches/edi/external-format.lisp
branches/edi/flexi-streams.asd
Log:
Re-org
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,256 +29,6 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-chars (format sequence start end warnp)
- (declare #.*standard-optimize-settings*)
- (:documentation "Computes the exact number of characters required to
-decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT. If WARNP is NIL, warnings will be muffled."))
-
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
- (declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'vector) start end warnp))
-
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence warnp))
- (- end start))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
- ;; this method only applies to the 8-bit formats as all other
- ;; formats with CRLF line endings have their own specialized methods
- ;; below
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore warnp))
- (let ((i start)
- (length (- end start)))
- (declare (fixnum i length))
- (loop
- (when (>= i end)
- (return))
- (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
- (unless position
- (return))
- (setq i (1+ position))
- (decf length)))
- length))
-
-(defgeneric check-end (format start end i warnp)
- (declare #.*fixnum-optimize-settings*)
- (:method (format start end i warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end i))
- (when (and warnp (> i end))
- (signal-encoding-warning format "These ~A octet~:P can't be ~
-decoded using ~A as the sequence is too short. ~A octet~:P missing ~
-at then end."
- (- end start)
- (external-format-name format)
- (- i end))))
- (:method ((format flexi-utf-16-format) start end i warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end i))
- (declare (ignore i warnp))
- ;; don't warn twice
- (when (evenp (- end start))
- (call-next-method))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((octet (aref sequence i))
- (length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
- (t 4))))
- (declare (fixnum length) (type octet octet))
- (incf sum)
- (incf i length)))
- (check-end format start end i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start)
- (last-octet 0))
- (declare (fixnum i sum) (type octet last-octet))
- (loop
- (when (>= i end)
- (return))
- (let* ((octet (aref sequence i))
- (length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
- (t 4))))
- (declare (fixnum length) (type octet octet))
- (unless (and (= octet +lf+) (= last-octet +cr+))
- (incf sum))
- (incf i length)
- (setq last-octet octet)))
- (check-end format start end i warnp)
- sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (when (and warnp (oddp (- end start)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-16 as ~:*~A is not even."
- (- end start))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence (1+ i)))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (incf sum)
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence i))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (incf sum)
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start)
- (last-octet 0))
- (declare (fixnum i sum) (type octet last-octet))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence (1+ i)))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (unless (and (zerop high-octet)
- (= (the octet (aref sequence i)) +lf+)
- (= last-octet +cr+))
- (incf sum))
- (setq last-octet (if (zerop high-octet)
- (aref sequence i)
- 0))
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start)
- (last-octet 0))
- (declare (fixnum i sum) (type octet last-octet))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence i))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (unless (and (zerop high-octet)
- (= (the octet (aref sequence (1+ i))) +lf+)
- (= last-octet +cr+))
- (incf sum))
- (setq last-octet (if (zerop high-octet)
- (aref sequence (1+ i))
- 0))
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (let ((length (- end start)))
- (when (and warnp (plusp (mod length 4)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-32 as ~:*~A is not a multiple-value of four."
- length))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence warnp))
- (ceiling (- end start) 4))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore warnp))
- (let ((i start)
- (length (ceiling (- end start) 4)))
- (decf end 8)
- (loop
- (when (> i end)
- (return))
- (cond ((loop for j of-type fixnum from i
- for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
- always (= octet (aref sequence j)))
- (decf length)
- (incf i 8))
- (t (incf i 4))))
- length))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore warnp))
- (let ((i start)
- (length (ceiling (- end start) 4)))
- (decf end 8)
- (loop
- (when (> i end)
- (return))
- (cond ((loop for j of-type fixnum from i
- for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
- always (= octet (aref sequence j)))
- (decf length)
- (incf i 8))
- (t (incf i 4))))
- length))
-
(defun recover-from-encoding-error (external-format format-control &rest format-args)
"Helper function used by OCTETS-TO-CHAR-CODE below to deal with
encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,125 +29,6 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-octets (format sequence start end)
- (declare #.*standard-optimize-settings*)
- (:documentation "Computes the exact number of octets required to
-encode the sequence of characters in SEQUENCE from START to END using
-the external format FORMAT."))
-
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (- end start))
-
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((< char-code #x80) 1)
- ((< char-code #x800) 2)
- ((< char-code #x10000) 3)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
- ((< char-code #x80) 1)
- ((< char-code #x800) 2)
- ((< char-code #x10000) 3)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((< char-code #x10000) 2)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
- ((< char-code #x10000) 2)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
- ((< char-code #x10000) 2)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (* 4 (- end start)))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (+ (call-next-method)
- (* (case (external-format-name format)
- (:utf-32 4)
- (otherwise 1))
- (count #\Newline sequence :start start :end end :test #'char=))))
-
(defgeneric char-to-octets (format char writer)
(declare #.*standard-optimize-settings*)
(:documentation "Converts the character CHAR to a sequence of octets
Modified: branches/edi/external-format.lisp
==============================================================================
--- branches/edi/external-format.lisp (original)
+++ branches/edi/external-format.lisp Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.22 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -387,46 +387,3 @@
NORMALIZE-EXTERNAL-FORMAT."
(print-unreadable-object (object stream :type t :identity t)
(prin1 (normalize-external-format object) stream)))
-
-(defgeneric encoding-factor (format)
- (:documentation "Given an external format FORMAT, returns a factor
-which denotes the octets to characters ratio to expect when
-encoding/decoding. If the returned value is an integer, the factor is
-assumed to be exact. If it is a \(double) float, the factor is
-supposed to be based on heuristics and usually not exact.
-
-This factor is used in string.lisp.")
- (declare #.*standard-optimize-settings*))
-
-(defmethod encoding-factor ((format flexi-8-bit-format))
- (declare #.*standard-optimize-settings*)
- ;; 8-bit encodings map octets to characters in an exact one-to-one
- ;; fashion
- 1)
-
-(defmethod encoding-factor ((format flexi-utf-8-format))
- (declare #.*standard-optimize-settings*)
- ;; UTF-8 characters can be anything from one to six octets, but we
- ;; assume that the "overhead" is only about 5 percent - this
- ;; estimate is obviously very much dependant on the content
- 1.05d0)
-
-(defmethod encoding-factor ((format flexi-utf-16-format))
- (declare #.*standard-optimize-settings*)
- ;; usually one character maps to two octets, but characters with
- ;; code points above #x10000 map to four octets - we assume that we
- ;; usually don't see these characters but of course have to return a
- ;; float
- 2.0d0)
-
-(defmethod encoding-factor ((format flexi-utf-32-format))
- (declare #.*standard-optimize-settings*)
- ;; UTF-32 always matches every character to four octets
- 4)
-
-(defmethod encoding-factor ((format flexi-crlf-mixin))
- (declare #.*standard-optimize-settings*)
- ;; if the sequence #\Return #\Linefeed is the line-end marker, this
- ;; obviously makes encodings potentially longer and definitely makes
- ;; the estimate unexact
- (* 1.02d0 (call-next-method)))
Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd (original)
+++ branches/edi/flexi-streams.asd Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -47,6 +47,7 @@
(:file "util")
(:file "conditions")
(:file "external-format")
+ (:file "length")
(:file "encode")
(:file "decode")
(:file "in-memory")
Added: branches/edi/length.lisp
==============================================================================
--- (empty file)
+++ branches/edi/length.lisp Sun May 25 08:26:47 2008
@@ -0,0 +1,444 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;; * Redistributions of source code must retain the above copyright
+;;; notice, this list of conditions and the following disclaimer.
+
+;;; * Redistributions in binary form must reproduce the above
+;;; copyright notice, this list of conditions and the following
+;;; disclaimer in the documentation and/or other materials
+;;; provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(defgeneric encoding-factor (format)
+ (:documentation "Given an external format FORMAT, returns a factor
+which denotes the octets to characters ratio to expect when
+encoding/decoding. If the returned value is an integer, the factor is
+assumed to be exact. If it is a \(double) float, the factor is
+supposed to be based on heuristics and usually not exact.
+
+This factor is used in string.lisp.")
+ (declare #.*standard-optimize-settings*))
+
+(defmethod encoding-factor ((format flexi-8-bit-format))
+ (declare #.*standard-optimize-settings*)
+ ;; 8-bit encodings map octets to characters in an exact one-to-one
+ ;; fashion
+ 1)
+
+(defmethod encoding-factor ((format flexi-utf-8-format))
+ (declare #.*standard-optimize-settings*)
+ ;; UTF-8 characters can be anything from one to six octets, but we
+ ;; assume that the "overhead" is only about 5 percent - this
+ ;; estimate is obviously very much dependant on the content
+ 1.05d0)
+
+(defmethod encoding-factor ((format flexi-utf-16-format))
+ (declare #.*standard-optimize-settings*)
+ ;; usually one character maps to two octets, but characters with
+ ;; code points above #x10000 map to four octets - we assume that we
+ ;; usually don't see these characters but of course have to return a
+ ;; float
+ 2.0d0)
+
+(defmethod encoding-factor ((format flexi-utf-32-format))
+ (declare #.*standard-optimize-settings*)
+ ;; UTF-32 always matches every character to four octets
+ 4)
+
+(defmethod encoding-factor ((format flexi-crlf-mixin))
+ (declare #.*standard-optimize-settings*)
+ ;; if the sequence #\Return #\Linefeed is the line-end marker, this
+ ;; obviously makes encodings potentially longer and definitely makes
+ ;; the estimate unexact
+ (* 1.02d0 (call-next-method)))
+
+(defgeneric check-end (format start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (:documentation "Helper function used below to determine if we tried
+to read past the end of the sequence.")
+ (:method (format start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
+ (when (and warnp (> i end))
+ (signal-encoding-warning format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short. ~A octet~:P missing ~
+at then end."
+ (- end start)
+ (external-format-name format)
+ (- i end))))
+ (:method ((format flexi-utf-16-format) start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
+ (declare (ignore i warnp))
+ ;; don't warn twice
+ (when (evenp (- end start))
+ (call-next-method))))
+
+(defgeneric compute-number-of-chars (format sequence start end warnp)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of characters required to
+decode the sequence of octets in SEQUENCE from START to END using the
+external format FORMAT. If WARNP is NIL, warnings will be muffled."))
+
+(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'vector) start end warnp))
+
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence warnp))
+ (- end start))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+ ;; this method only applies to the 8-bit formats as all other
+ ;; formats with CRLF line endings have their own specialized methods
+ ;; below
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore warnp))
+ (let ((i start)
+ (length (- end start)))
+ (declare (fixnum i length))
+ (loop
+ (when (>= i end)
+ (return))
+ (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
+ (unless position
+ (return))
+ (setq i (1+ position))
+ (decf length)))
+ length))
+
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((octet (aref sequence i))
+ (length (cond ((not (logbitp 7 octet)) 1)
+ ((= #b11000000 (logand octet #b11100000)) 2)
+ ((= #b11100000 (logand octet #b11110000)) 3)
+ (t 4))))
+ (declare (fixnum length) (type octet octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start end i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (type octet last-octet))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((octet (aref sequence i))
+ (length (cond ((not (logbitp 7 octet)) 1)
+ ((= #b11000000 (logand octet #b11100000)) 2)
+ ((= #b11100000 (logand octet #b11110000)) 3)
+ (t 4))))
+ (declare (fixnum length) (type octet octet))
+ (unless (and (= octet +lf+) (= last-octet +cr+))
+ (incf sum))
+ (incf i length)
+ (setq last-octet octet)))
+ (check-end format start end i warnp)
+ sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (when (and warnp (oddp (- end start)))
+ (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-16 as ~:*~A is not even."
+ (- end start))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence (1+ i)))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence i))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (type octet last-octet))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence (1+ i)))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (unless (and (zerop high-octet)
+ (= (the octet (aref sequence i)) +lf+)
+ (= last-octet +cr+))
+ (incf sum))
+ (setq last-octet (if (zerop high-octet)
+ (aref sequence i)
+ 0))
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (type octet last-octet))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence i))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (unless (and (zerop high-octet)
+ (= (the octet (aref sequence (1+ i))) +lf+)
+ (= last-octet +cr+))
+ (incf sum))
+ (setq last-octet (if (zerop high-octet)
+ (aref sequence (1+ i))
+ 0))
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (let ((length (- end start)))
+ (when (and warnp (plusp (mod length 4)))
+ (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-32 as ~:*~A is not a multiple-value of four."
+ length))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence warnp))
+ (ceiling (- end start) 4))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore warnp))
+ (let ((i start)
+ (length (ceiling (- end start) 4)))
+ (decf end 8)
+ (loop
+ (when (> i end)
+ (return))
+ (cond ((loop for j of-type fixnum from i
+ for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
+ always (= octet (aref sequence j)))
+ (decf length)
+ (incf i 8))
+ (t (incf i 4))))
+ length))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore warnp))
+ (let ((i start)
+ (length (ceiling (- end start) 4)))
+ (decf end 8)
+ (loop
+ (when (> i end)
+ (return))
+ (cond ((loop for j of-type fixnum from i
+ for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
+ always (= octet (aref sequence j)))
+ (decf length)
+ (incf i 8))
+ (t (incf i 4))))
+ length))
+
+(defgeneric compute-number-of-octets (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+ ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (+ (call-next-method)
+ (* (case (external-format-name format)
+ (:utf-32 4)
+ (otherwise 1))
+ (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
1
0
Author: eweitz
Date: Sat May 24 23:35:21 2008
New Revision: 56
Modified:
branches/edi/decode.lisp
branches/edi/encode.lisp
branches/edi/input.lisp
branches/edi/strings.lisp
Log:
Some cosmetic fixes
Passes tests on AllegroCL as well now
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.19 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -42,7 +42,7 @@
(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence))
+ (declare (ignore sequence warnp))
(- end start))
(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
@@ -51,6 +51,7 @@
;; below
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
+ (declare (ignore warnp))
(let ((i start)
(length (- end start)))
(declare (fixnum i length))
@@ -66,8 +67,9 @@
(defgeneric check-end (format start end i warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end i))
(:method (format start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
(when (and warnp (> i end))
(signal-encoding-warning format "These ~A octet~:P can't be ~
decoded using ~A as the sequence is too short. ~A octet~:P missing ~
@@ -76,6 +78,9 @@
(external-format-name format)
(- i end))))
(:method ((format flexi-utf-16-format) start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
+ (declare (ignore i warnp))
;; don't warn twice
(when (evenp (- end start))
(call-next-method))))
@@ -94,7 +99,7 @@
((= #b11000000 (logand octet #b11100000)) 2)
((= #b11100000 (logand octet #b11110000)) 3)
(t 4))))
- (declare (fixnum length) (octet octet))
+ (declare (fixnum length) (type octet octet))
(incf sum)
(incf i length)))
(check-end format start end i warnp)
@@ -106,7 +111,7 @@
(let ((sum 0)
(i start)
(last-octet 0))
- (declare (fixnum i sum) (octet last-octet))
+ (declare (fixnum i sum) (type octet last-octet))
(loop
(when (>= i end)
(return))
@@ -115,7 +120,7 @@
((= #b11000000 (logand octet #b11100000)) 2)
((= #b11100000 (logand octet #b11110000)) 3)
(t 4))))
- (declare (fixnum length) (octet octet))
+ (declare (fixnum length) (type octet octet))
(unless (and (= octet +lf+) (= last-octet +cr+))
(incf sum))
(incf i length)
@@ -126,6 +131,7 @@
(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
+ (declare (ignore sequence))
(when (and warnp (oddp (- end start)))
(signal-encoding-warning format "~A octet~:P cannot be decoded ~
using UTF-16 as ~:*~A is not even."
@@ -144,7 +150,7 @@
(let* ((high-octet (aref sequence (1+ i)))
(length (cond ((<= #xd8 high-octet #xdf) 4)
(t 2))))
- (declare (fixnum length) (octet high-octet))
+ (declare (fixnum length) (type octet high-octet))
(incf sum)
(incf i length)))
(check-end format start (+ end 2) i warnp)
@@ -163,7 +169,7 @@
(let* ((high-octet (aref sequence i))
(length (cond ((<= #xd8 high-octet #xdf) 4)
(t 2))))
- (declare (fixnum length) (octet high-octet))
+ (declare (fixnum length) (type octet high-octet))
(incf sum)
(incf i length)))
(check-end format start (+ end 2) i warnp)
@@ -175,7 +181,7 @@
(let ((sum 0)
(i start)
(last-octet 0))
- (declare (fixnum i sum) (octet last-octet))
+ (declare (fixnum i sum) (type octet last-octet))
(decf end 2)
(loop
(when (> i end)
@@ -183,7 +189,7 @@
(let* ((high-octet (aref sequence (1+ i)))
(length (cond ((<= #xd8 high-octet #xdf) 4)
(t 2))))
- (declare (fixnum length) (octet high-octet))
+ (declare (fixnum length) (type octet high-octet))
(unless (and (zerop high-octet)
(= (the octet (aref sequence i)) +lf+)
(= last-octet +cr+))
@@ -201,7 +207,7 @@
(let ((sum 0)
(i start)
(last-octet 0))
- (declare (fixnum i sum) (octet last-octet))
+ (declare (fixnum i sum) (type octet last-octet))
(decf end 2)
(loop
(when (> i end)
@@ -209,7 +215,7 @@
(let* ((high-octet (aref sequence i))
(length (cond ((<= #xd8 high-octet #xdf) 4)
(t 2))))
- (declare (fixnum length) (octet high-octet))
+ (declare (fixnum length) (type octet high-octet))
(unless (and (zerop high-octet)
(= (the octet (aref sequence (1+ i))) +lf+)
(= last-octet +cr+))
@@ -224,6 +230,7 @@
(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
+ (declare (ignore sequence))
(let ((length (- end start)))
(when (and warnp (plusp (mod length 4)))
(signal-encoding-warning format "~A octet~:P cannot be decoded ~
@@ -233,12 +240,13 @@
(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence))
+ (declare (ignore sequence warnp))
(ceiling (- end start) 4))
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
+ (declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
(decf end 8)
@@ -256,6 +264,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
+ (declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
(decf end 8)
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -55,7 +55,7 @@
((< char-code #x800) 2)
((< char-code #x10000) 3)
(t 4))))
- (declare (fixnum char-length) (char-code-integer char-code))
+ (declare (fixnum char-length) (type char-code-integer char-code))
(incf sum char-length)
(incf i)))
sum))
@@ -75,7 +75,7 @@
((< char-code #x800) 2)
((< char-code #x10000) 3)
(t 4))))
- (declare (fixnum char-length) (char-code-integer char-code))
+ (declare (fixnum char-length) (type char-code-integer char-code))
(incf sum char-length)
(incf i)))
sum))
@@ -92,7 +92,7 @@
(let* ((char-code (char-code (aref sequence i)))
(char-length (cond ((< char-code #x10000) 2)
(t 4))))
- (declare (fixnum char-length) (char-code-integer char-code))
+ (declare (fixnum char-length) (type char-code-integer char-code))
(incf sum char-length)
(incf i)))
sum))
@@ -110,7 +110,7 @@
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
((< char-code #x10000) 2)
(t 4))))
- (declare (fixnum char-length) (char-code-integer char-code))
+ (declare (fixnum char-length) (type char-code-integer char-code))
(incf sum char-length)
(incf i)))
sum))
@@ -128,7 +128,7 @@
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
((< char-code #x10000) 2)
(t 4))))
- (declare (fixnum char-length) (char-code-integer char-code))
+ (declare (fixnum char-length) (type char-code-integer char-code))
(incf sum char-length)
(incf i)))
sum))
@@ -194,7 +194,7 @@
(setq buffer-pos 0))
(write-octet (octet)
"Adds one octet to the buffer and flushes it if necessary."
- (declare (octet octet))
+ (declare (type octet octet))
(when (>= buffer-pos buffer-size)
(flush-buffer))
(setf (aref buffer buffer-pos) octet)
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp (original)
+++ branches/edi/input.lisp Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.76 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -150,8 +150,7 @@
(external-format flexi-stream-external-format))
flexi-input-stream
(let ((counter 0) octets-reversed)
- (declare (integer position)
- (fixnum counter))
+ (declare (fixnum counter))
(flet ((writer (octet)
(incf counter)
(push octet octets-reversed)))
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sat May 24 23:35:21 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.28 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -69,7 +69,7 @@
(prog1
(nth i sequence)
(incf i))))))))
- (declare (fixnum i length) (dynamic-extent reader))
+ (declare (fixnum i) (dynamic-extent reader))
(labels ((pseudo-writer (octet)
(declare (ignore octet))
(decf i))
@@ -100,6 +100,6 @@
subsequence \(of octets) of SEQUENCE from START to END in characters
if decoded using the external format EXTERNAL-FORMAT."
(declare #.*standard-optimize-settings*)
- (declare (fixnum start end) (string string))
+ (declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
(compute-number-of-chars external-format sequence start end t))
1
0

25 May '08
Author: eweitz
Date: Sat May 24 23:14:26 2008
New Revision: 55
Modified:
branches/edi/conditions.lisp
branches/edi/decode.lisp
branches/edi/doc/index.html
branches/edi/input.lisp
branches/edi/packages.lisp
branches/edi/strings.lisp
branches/edi/test/test.lisp
branches/edi/util.lisp
Log:
Pre-compute string length
Enhanced condition hierarchy
Passes tests on LW
Modified: branches/edi/conditions.lisp
==============================================================================
--- branches/edi/conditions.lisp (original)
+++ branches/edi/conditions.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -82,22 +82,32 @@
(:documentation "Errors of this type are signalled if an erroneous
position spec is used in conjunction with FILE-POSITION."))
-(define-condition external-format-error ()
+(define-condition external-format-condition (simple-condition)
((external-format :initarg :external-format
:initform nil
- :reader external-format-error-external-format))
+ :reader external-format-condition-external-format))
+ (:documentation "Superclass for all conditions related to external
+formats."))
+
+(define-condition external-format-error (external-format-condition error)
+ ()
(:documentation "Superclass for all errors related to external
formats."))
-(define-condition external-format-simple-error (external-format-error simple-condition)
+(define-condition external-format-warning (external-format-condition warning)
()
- (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting
-capabilities."))
+ (:documentation "Superclass for all warnings related to external
+formats."))
-(define-condition external-format-encoding-error (external-format-simple-error)
+(define-condition external-format-encoding-error (external-format-error)
()
(:documentation "Errors of this type are signalled if there is an
encoding problem."))
+
+(define-condition external-format-encoding-warning (external-format-warning)
+ ()
+ (:documentation "Warnings of this type are signalled if there is an
+encoding problem."))
(defun signal-encoding-error (external-format format-control &rest format-args)
"Convenience function similar to ERROR to signal conditions of type
@@ -106,3 +116,11 @@
:format-control format-control
:format-arguments format-args
:external-format external-format))
+
+(defun signal-encoding-warning (external-format format-control &rest format-args)
+ "Convenience function similar to WARN to signal conditions of type
+EXTERNAL-FORMAT-ENCODING-WARNING."
+ (warn 'external-format-encoding-warning
+ :format-control format-control
+ :format-arguments format-args
+ :external-format external-format))
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.18 2008/05/25 01:42:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.19 2008/05/25 03:07:59 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,23 +29,26 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-chars (format sequence start end)
+(defgeneric compute-number-of-chars (format sequence start end warnp)
(declare #.*standard-optimize-settings*)
(:documentation "Computes the exact number of characters required to
decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT."))
+external format FORMAT. If WARNP is NIL, warnings will be muffled."))
-(defmethod compute-number-of-chars :around (format (list list) start end)
+(defmethod compute-number-of-chars :around (format (list list) start end warnp)
(declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'vector) start end))
+ (call-next-method format (coerce list 'vector) start end warnp))
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(declare (ignore sequence))
(- end start))
-(defmethod compute-number-of-chars ((format flexi-crlf-8-bit-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+ ;; this method only applies to the 8-bit formats as all other
+ ;; formats with CRLF line endings have their own specialized methods
+ ;; below
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((i start)
@@ -61,18 +64,23 @@
(decf length)))
length))
-(defun check-end (format start end i)
+(defgeneric check-end (format start end i warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end i))
- (unless (= i end)
- (signal-encoding-error format "These ~A octet~:P can't be ~
-decoded using ~A as the sequence is too short. ~A octet~:P ~
-missing at then end."
- (- end start)
- (external-format-name format)
- (- i end))))
+ (:method (format start end i warnp)
+ (when (and warnp (> i end))
+ (signal-encoding-warning format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short. ~A octet~:P missing ~
+at then end."
+ (- end start)
+ (external-format-name format)
+ (- i end))))
+ (:method ((format flexi-utf-16-format) start end i warnp)
+ ;; don't warn twice
+ (when (evenp (- end start))
+ (call-next-method))))
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
@@ -89,10 +97,10 @@
(declare (fixnum length) (octet octet))
(incf sum)
(incf i length)))
- (check-end format start end i)
+ (check-end format start end i warnp)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
@@ -112,25 +120,26 @@
(incf sum))
(incf i length)
(setq last-octet octet)))
- (check-end format start end i)
+ (check-end format start end i warnp)
sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (unless (evenp (- end start))
- (signal-encoding-error format "~A octet~:P cannot be decoded using ~
-UTF-16 as ~:*~A is not even."
- (- end start))))
+ (when (and warnp (oddp (- end start)))
+ (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-16 as ~:*~A is not even."
+ (- end start))))
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
+ (decf end 2)
(loop
- (when (>= i end)
+ (when (> i end)
(return))
(let* ((high-octet (aref sequence (1+ i)))
(length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -138,17 +147,18 @@
(declare (fixnum length) (octet high-octet))
(incf sum)
(incf i length)))
- (check-end format start end i)
+ (check-end format start (+ end 2) i warnp)
sum))
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
+ (decf end 2)
(loop
- (when (>= i end)
+ (when (> i end)
(return))
(let* ((high-octet (aref sequence i))
(length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -156,18 +166,19 @@
(declare (fixnum length) (octet high-octet))
(incf sum)
(incf i length)))
- (check-end format start end i)
+ (check-end format start (+ end 2) i warnp)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
(i start)
(last-octet 0))
(declare (fixnum i sum) (octet last-octet))
+ (decf end 2)
(loop
- (when (>= i end)
+ (when (> i end)
(return))
(let* ((high-octet (aref sequence (1+ i)))
(length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -175,24 +186,25 @@
(declare (fixnum length) (octet high-octet))
(unless (and (zerop high-octet)
(= (the octet (aref sequence i)) +lf+)
- (= last-octet +cr+))
+ (= last-octet +cr+))
(incf sum))
- (incf i length)
(setq last-octet (if (zerop high-octet)
(aref sequence i)
- 0))))
- (check-end format start end i)
+ 0))
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
(i start)
(last-octet 0))
(declare (fixnum i sum) (octet last-octet))
+ (decf end 2)
(loop
- (when (>= i end)
+ (when (> i end)
(return))
(let* ((high-octet (aref sequence i))
(length (cond ((<= #xd8 high-octet #xdf) 4)
@@ -202,32 +214,33 @@
(= (the octet (aref sequence (1+ i))) +lf+)
(= last-octet +cr+))
(incf sum))
- (incf i length)
(setq last-octet (if (zerop high-octet)
(aref sequence (1+ i))
- 0))))
- (check-end format start end i)
+ 0))
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((length (- end start)))
- (unless (zerop (mod length 4))
- (signal-encoding-error format "~A octet~:P cannot be decoded using ~
-UTF-32 as ~:*~A is not a multiple-value of four."
- length))))
+ (when (and warnp (plusp (mod length 4)))
+ (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-32 as ~:*~A is not a multiple-value of four."
+ length))))
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(declare (ignore sequence))
- (/ (- end start) 4))
+ (ceiling (- end start) 4))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((i start)
- (length (/ (- end start) 4)))
+ (length (ceiling (- end start) 4)))
(decf end 8)
(loop
(when (> i end)
@@ -240,11 +253,11 @@
(t (incf i 4))))
length))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((i start)
- (length (/ (- end start) 4)))
+ (length (ceiling (- end start) 4)))
(decf end 8)
(loop
(when (> i end)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html (original)
+++ branches/edi/doc/index.html Sat May 24 23:14:26 2008
@@ -69,7 +69,12 @@
<li><a href="#external-format-equal"><code>external-format-equal</code></a>
<li><a href="#*default-eol-style*"><code>*default-eol-style*</code></a>
<li><a href="#*default-little-endian*"><code>*default-little-endian*</code></a>
+ <li><a href="#external-format-condition"><code>external-format-condition</code></a>
+ <li><a href="#external-format-condition-external-format"><code>external-format-condition-external-format</code></a>
+ <li><a href="#external-format-error"><code>external-format-error</code></a>
+ <li><a href="#external-format-warning"><code>external-format-warning</code></a>
<li><a href="#external-format-encoding-error"><code>external-format-encoding-error</code></a>
+ <li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
</ol>
<li><a href="#flexi-streams">Flexi streams</a>
<ol>
@@ -86,7 +91,6 @@
<li><a href="#flexi-stream-stream"><code>flexi-stream-stream</code></a>
<li><a href="#unread-byte"><code>unread-byte</code></a>
<li><a href="#peek-byte"><code>peek-byte</code></a>
- <li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
<li><a href="#octet"><code>octet</code></a>
<li><a href="#flexi-stream-error"><code>flexi-stream-error</code></a>
<li><a href="#flexi-stream-out-of-sync-error"><code>flexi-stream-out-of-sync-error</code></a>
@@ -526,29 +530,98 @@
</blockquote>
<p><br>[Condition]
-<br><a class=none name="external-format-error"><b>external-format-error</b></a>
+<br><a class=none name="external-format-condition"><b>external-format-condition</b></a>
<blockquote><br>
-All errors related to <a href="#external-formats">external formats</a> are of this type.
-There's a slot for the external format which can be accessed with <a href="#external-format-error-external-format"><code>EXTERNAL-FORMAT-ERROR-EXTERNAL-FORMAT</code></a>.
+All conditions related to <a href="#external-formats">external formats</a> are of this type.
+There's a slot for the external format which can be accessed with <a href="#external-format-condition-external-format"><code>EXTERNAL-FORMAT-CONDITION-EXTERNAL-FORMAT</code></a>.
</blockquote>
<p><br>[Reader]
-<br><a class=none name="external-format-error-external-format"><b>external-format-error-external-format</b> <i>condition</i> => <i>external-format</i></a>
+<br><a class=none name="external-format-condition-external-format"><b>external-format-condition-external-format</b> <i>condition</i> => <i>external-format</i></a>
<blockquote><br> If <code><i>condition</i></code> is of
-type <a href="#external-format-error"><code>EXTERNAL-FORMAT-ERROR</code></a>,
+type <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>,
this function will return the associated external format. Note that
-there are errors which happen during the creation of external formats
-where this method returns <code>NIL</code>.
+there are situation which happen during the creation of external
+formats where this method returns <code>NIL</code>.
+</blockquote>
+
+<p><br>[Condition]
+<br><a class=none name="external-format-warning"><b>external-format-warning</b></a>
+
+<blockquote><br>
+All warnings related to <a href="#external-formats">external formats</a> are of this type.
+This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
+</blockquote>
+
+<p><br>[Condition]
+<br><a class=none name="external-format-error"><b>external-format-error</b></a>
+
+<blockquote><br>
+All errors related to <a href="#external-formats">external formats</a> are of this type.
+This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
</blockquote>
<p><br>[Condition]
<br><a class=none name="external-format-encoding-error"><b>external-format-encoding-error</b></a>
<blockquote><br>
-All errors related to encoding problems with <a href="#flexi-streams">flexi streams</a> are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code>
-restart</a> is provided. See also <a href="#*substitution-char*"><code>*SUBSTITUTION-CHAR*</code></a> and example for it. <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> is a subtype of <a href="#external-format-error"><code>EXTERNAL-FORMAT-ERROR</code></a>.
+All errors related to encoding problems with <a href="#external-formats">external formats</a> are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code>
+restart</a> is provided. See also <a href="#*substitution-char*"><code>*SUBSTITUTION-CHAR*</code></a> and the example for it. <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> is a subtype of <a href="#external-format-error"><code>EXTERNAL-FORMAT-ERROR</code></a>.
+</blockquote>
+
+<p><br>[Special variable]
+<br><a class=none name="*substitution-char*"><b>*substitution-char*</b></a>
+
+<blockquote><br>
+If this value is not NIL, it should be a character which is used
+(as if by a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code> restart</a>) whenever during reading an error of
+type <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> would have been signalled otherwise.
+
+<pre>
+CL-USER 1 > (defun foo ()
+ <font color=orange>;; not a valid UTF-8 sequence</font>
+ (<a href="#with-input-from-sequence" class=noborder>with-input-from-sequence</a> (in '(#xe4 #xf6 #xfc))
+ (setq in (<a href="#make-flexi-stream" class=noborder>make-flexi-stream</a> in :external-format :utf8))
+ (read-line in)))
+FOO
+
+CL-USER 2 > (foo)
+
+Error: Unexpected value #xF6 in UTF-8 sequence.
+ 1 (continue) Specify a character to be used instead.
+ 2 (abort) Return to level 0.
+ 3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed, or :? for other options
+
+CL-USER 3 : 1 > :c
+Type a character: x
+
+Error: End of file while in UTF-8 sequence.
+ 1 (continue) Specify a character to be used instead.
+ 2 (abort) Return to level 0.
+ 3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed, or :? for other options
+
+CL-USER 4 : 1 > :c
+Type a character: y
+"xy"
+T
+
+CL-USER 5 > (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/m_handle.htm" class=noborder>handler-bind</a> ((<a href="#external-format-encoding-error" class=noborder>external-format-encoding-error</a> (lambda (condition)
+ (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm" class=noborder>use-value</a> #\-))))
+ (foo))
+"--"
+T
+
+CL-USER 6 > (let ((<a href="#*SUBSTITUTION-CHAR*" class=noborder>*substitution-char*</a> #\?))
+ (foo))
+"??"
+T
+</pre>
</blockquote>
<h4><a name="flexi-streams" class=none>Flexi streams</a></h4>
@@ -739,59 +812,6 @@
Note that the parameters aren't in the same order as with <a href="http://www.lispworks.com/documentation/HyperSpec/Body/f_peek_c.htm"><code>PEEK-CHAR</code></a> because it doesn't make much sense to make <code><i>stream</i></code> an optional argument.
</blockquote>
-<p><br>[Special variable]
-<br><a class=none name="*substitution-char*"><b>*substitution-char*</b></a>
-
-<blockquote><br>
-If this value is not NIL, it should be a character which is used
-(as if by a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm"><code>USE-VALUE</code> restart</a>) whenever during reading an error of
-type <a href="#external-format-encoding-error"><code>EXTERNAL-FORMAT-ENCODING-ERROR</code></a> would have been signalled otherwise.
-
-<pre>
-CL-USER 1 > (defun foo ()
- <font color=orange>;; not a valid UTF-8 sequence</font>
- (<a href="#with-input-from-sequence" class=noborder>with-input-from-sequence</a> (in '(#xe4 #xf6 #xfc))
- (setq in (<a href="#make-flexi-stream" class=noborder>make-flexi-stream</a> in :external-format :utf8))
- (read-line in)))
-FOO
-
-CL-USER 2 > (foo)
-
-Error: Unexpected value #xF6 in UTF-8 sequence.
- 1 (continue) Specify a character to be used instead.
- 2 (abort) Return to level 0.
- 3 Return to top loop level 0.
-
-Type :b for backtrace, :c <option number> to proceed, or :? for other options
-
-CL-USER 3 : 1 > :c
-Type a character: x
-
-Error: End of file while in UTF-8 sequence.
- 1 (continue) Specify a character to be used instead.
- 2 (abort) Return to level 0.
- 3 Return to top loop level 0.
-
-Type :b for backtrace, :c <option number> to proceed, or :? for other options
-
-CL-USER 4 : 1 > :c
-Type a character: y
-"xy"
-T
-
-CL-USER 5 > (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/m_handle.htm" class=noborder>handler-bind</a> ((<a href="#external-format-encoding-error" class=noborder>external-format-encoding-error</a> (lambda (condition)
- (<a href="http://www.lispworks.com/documentation/HyperSpec/Body/r_use_va.htm" class=noborder>use-value</a> #\-))))
- (foo))
-"--"
-T
-
-CL-USER 6 > (let ((<a href="#*SUBSTITUTION-CHAR*" class=noborder>*substitution-char*</a> #\?))
- (foo))
-"??"
-T
-</pre>
-</blockquote>
-
<p><br>[Type]
<br><a class=none name="octet"><b>octet</b></a>
@@ -997,7 +1017,7 @@
<blockquote><br> Converts the Lisp
sequence <code><i>sequence</i></code> of <a href="#octet">octets</a>
-from <code><i>start</i></code> to <code><i>end</i></code> to string
+from <code><i>start</i></code> to <code><i>end</i></code> to a string
using the <a href="#external-formats">external format</a> designated
by <code><i>external-format</i></code>. The defaults for
<code><i>start</i></code> and <code><i>end</i></code>
@@ -1075,7 +1095,7 @@
his work on making FLEXI-STREAMS faster.
<p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.112 2008/05/25 01:41:25 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp (original)
+++ branches/edi/input.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.75 2008/05/23 14:43:09 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.76 2008/05/25 03:07:59 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -354,10 +354,10 @@
(with-accessors ((last-char-code flexi-stream-last-char-code))
stream
(unless last-char-code
- (error 'flexi-stream-simple-error
+ (error 'flexi-stream-error
:format-control "No character to unread from this stream \(or external format has changed or last reading operation was binary)."))
(unless (= (char-code char) last-char-code)
- (error 'flexi-stream-simple-error
+ (error 'flexi-stream-error
:format-control "Last character read (~S) was different from ~S."
:format-arguments (list (code-char last-char-code) char)))
(unread-char% char stream)
@@ -374,10 +374,10 @@
(position flexi-stream-position))
flexi-input-stream
(unless last-octet
- (error 'flexi-stream-simple-error
+ (error 'flexi-stream-error
:format-control "No byte to unread from this stream \(or last reading operation read a character)."))
(unless (= byte last-octet)
- (error 'flexi-stream-simple-error
+ (error 'flexi-stream-error
:format-control "Last byte read was different from #x~X."
:format-arguments (list byte)))
(setq last-octet nil)
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp (original)
+++ branches/edi/packages.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.36 2008/05/25 01:40:54 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -41,14 +41,17 @@
:*default-little-endian*
:*substitution-char*
:char-length
+ :external-format-condition
+ :external-format-condition-external-format
:external-format-eol-style
:external-format-error
- :external-format-error-external-format
:external-format-encoding-error
+ :external-format-encoding-warning
:external-format-equal
:external-format-id
:external-format-little-endian
:external-format-name
+ :external-format-warning
:flexi-input-stream
:flexi-output-stream
:flexi-io-stream
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.26 2008/05/25 01:41:32 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.28 2008/05/25 03:07:59 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -45,13 +45,11 @@
(external-format :latin1)
(start 0) (end (length sequence)))
"Converts the Lisp sequence SEQUENCE of octets from START to END to
-string using the external format designated by EXTERNAL-FORMAT."
+a string using the external format designated by EXTERNAL-FORMAT."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
- (let* ((factor (encoding-factor external-format))
- (length (- end start))
- (i start)
+ (let* ((i start)
(reader (etypecase sequence
((array octet *)
(lambda ()
@@ -82,37 +80,12 @@
(flet ((next-char ()
(code-char (octets-to-char-code external-format reader))))
(declare (inline next-char))
- (etypecase factor
- (integer
- (let* ((string-length (ceiling length factor))
- (string (make-array string-length
- :element-type 'char*)))
- (declare (fixnum string-length))
- (loop for j of-type fixnum from 0 below string-length
- do (setf (schar string j) (next-char))
- finally (return string))))
- (double-float
- ;; this is a bit clunky but hopefully a bit more efficient than
- ;; using VECTOR-PUSH-EXTEND
- (let* ((string-length (ceiling length (the double-float factor)))
- (string (make-array string-length
- :element-type 'char*
- :fill-pointer t
- :adjustable t))
- (j 0))
- (declare (fixnum j string-length)
- (double-float factor))
- (loop
- (when (>= i end)
- (return))
- (when (>= j string-length)
- (setq factor (/ factor 2.0d0))
- (incf string-length (the fixnum (ceiling (- end i) factor)))
- (adjust-array string string-length :fill-pointer t))
- (setf (char string j) (next-char))
- (incf j))
- (setf (fill-pointer string) j)
- string))))))))
+ (let* ((string-length (compute-number-of-chars external-format sequence start end nil))
+ (string (make-array string-length :element-type 'char*)))
+ (declare (fixnum string-length))
+ (loop for j of-type fixnum from 0 below string-length
+ do (setf (schar string j) (next-char))
+ finally (return string))))))))
(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
"Returns the length of the substring of STRING from START to END in
@@ -129,4 +102,4 @@
(declare #.*standard-optimize-settings*)
(declare (fixnum start end) (string string))
(setq external-format (maybe-convert-external-format external-format))
- (compute-number-of-chars external-format sequence start end))
+ (compute-number-of-chars external-format sequence start end t))
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp (original)
+++ branches/edi/test/test.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -264,8 +264,8 @@
`(handler-case
(unless ,expression
(fail "Expression ~S failed.~%" ',expression))
- (condition (c)
- (fail "Expression ~S failed signaling condition of type ~A: ~A.~%"
+ (error (c)
+ (fail "Expression ~S failed signalling error of type ~A: ~A.~%"
',expression (type-of c) c)))))
(format *error-output* "Test ~S~%" ,test-description)
,@body
@@ -473,10 +473,10 @@
(check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))
;; not a valid UTF-8 sequence
(check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))
- (check (string= "??" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
+ (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
;; UTF-8 can't start neither with #b11111110 nor with #b11111111
(check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8)))
- (check (string= "??" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+ (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
(let ((*substitution-char* nil))
;; :ASCII doesn't have characters with char codes > 127
(check (string= "abc" (using-values (#\b #\c)
@@ -490,13 +490,13 @@
(read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))))
;; not a valid UTF-8 sequence
(check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
- (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
+ (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
;; UTF-8 can't start neither with #b11111110 nor with #b11111111
(check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8))))
- (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+ (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
;; only one byte
(check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le))))
- (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16le))))
+ (check (string= "" (read-flexi-line* #(#x01) :utf-16le)))
;; two bytes, but value of resulting word suggests that another word follows
(check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le))))
(check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le))))
@@ -507,7 +507,7 @@
(check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be))))
(check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be))))
(check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be))))
- (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16be))))
+ (check (string= "" (read-flexi-line* #(#x01) :utf-16be)))
(check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be))))
(check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be))))
;; the only case when error is signalled for UTF-32 is at end of file
Modified: branches/edi/util.lisp
==============================================================================
--- branches/edi/util.lisp (original)
+++ branches/edi/util.lisp Sat May 24 23:14:26 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.22 2008/05/25 01:40:54 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.23 2008/05/25 03:07:59 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -115,7 +115,7 @@
(unless (find real-name +name-map+
:test #'eq
:key #'cdr)
- (error 'external-format-simple-error
+ (error 'external-format-error
:format-control "~S is not known to be a name for an external format."
:format-arguments (list name)))
real-name))
1
0
Author: eweitz
Date: Sat May 24 21:43:56 2008
New Revision: 54
Modified:
branches/edi/decode.lisp
branches/edi/doc/index.html
branches/edi/packages.lisp
branches/edi/specials.lisp
branches/edi/strings.lisp
branches/edi/util.lisp
Log:
Compute decoding length
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.16 2008/05/20 23:01:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.18 2008/05/25 01:42:50 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,234 @@
(in-package :flexi-streams)
+(defgeneric compute-number-of-chars (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of characters required to
+decode the sequence of octets in SEQUENCE from START to END using the
+external format FORMAT."))
+
+(defmethod compute-number-of-chars :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'vector) start end))
+
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (- end start))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-8-bit-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((i start)
+ (length (- end start)))
+ (declare (fixnum i length))
+ (loop
+ (when (>= i end)
+ (return))
+ (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
+ (unless position
+ (return))
+ (setq i (1+ position))
+ (decf length)))
+ length))
+
+(defun check-end (format start end i)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
+ (unless (= i end)
+ (signal-encoding-error format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short. ~A octet~:P ~
+missing at then end."
+ (- end start)
+ (external-format-name format)
+ (- i end))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((octet (aref sequence i))
+ (length (cond ((not (logbitp 7 octet)) 1)
+ ((= #b11000000 (logand octet #b11100000)) 2)
+ ((= #b11100000 (logand octet #b11110000)) 3)
+ (t 4))))
+ (declare (fixnum length) (octet octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start end i)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (octet last-octet))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((octet (aref sequence i))
+ (length (cond ((not (logbitp 7 octet)) 1)
+ ((= #b11000000 (logand octet #b11100000)) 2)
+ ((= #b11100000 (logand octet #b11110000)) 3)
+ (t 4))))
+ (declare (fixnum length) (octet octet))
+ (unless (and (= octet +lf+) (= last-octet +cr+))
+ (incf sum))
+ (incf i length)
+ (setq last-octet octet)))
+ (check-end format start end i)
+ sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (unless (evenp (- end start))
+ (signal-encoding-error format "~A octet~:P cannot be decoded using ~
+UTF-16 as ~:*~A is not even."
+ (- end start))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((high-octet (aref sequence (1+ i)))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (octet high-octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start end i)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((high-octet (aref sequence i))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (octet high-octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start end i)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (octet last-octet))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((high-octet (aref sequence (1+ i)))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (octet high-octet))
+ (unless (and (zerop high-octet)
+ (= (the octet (aref sequence i)) +lf+)
+ (= last-octet +cr+))
+ (incf sum))
+ (incf i length)
+ (setq last-octet (if (zerop high-octet)
+ (aref sequence i)
+ 0))))
+ (check-end format start end i)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (octet last-octet))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((high-octet (aref sequence i))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (octet high-octet))
+ (unless (and (zerop high-octet)
+ (= (the octet (aref sequence (1+ i))) +lf+)
+ (= last-octet +cr+))
+ (incf sum))
+ (incf i length)
+ (setq last-octet (if (zerop high-octet)
+ (aref sequence (1+ i))
+ 0))))
+ (check-end format start end i)
+ sum))
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((length (- end start)))
+ (unless (zerop (mod length 4))
+ (signal-encoding-error format "~A octet~:P cannot be decoded using ~
+UTF-32 as ~:*~A is not a multiple-value of four."
+ length))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (/ (- end start) 4))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((i start)
+ (length (/ (- end start) 4)))
+ (decf end 8)
+ (loop
+ (when (> i end)
+ (return))
+ (cond ((loop for j of-type fixnum from i
+ for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
+ always (= octet (aref sequence j)))
+ (decf length)
+ (incf i 8))
+ (t (incf i 4))))
+ length))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((i start)
+ (length (/ (- end start) 4)))
+ (decf end 8)
+ (loop
+ (when (> i end)
+ (return))
+ (cond ((loop for j of-type fixnum from i
+ for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
+ always (= octet (aref sequence j)))
+ (decf length)
+ (incf i 8))
+ (t (incf i 4))))
+ length))
+
(defun recover-from-encoding-error (external-format format-control &rest format-args)
"Helper function used by OCTETS-TO-CHAR-CODE below to deal with
encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns
@@ -242,7 +470,7 @@
(declare (ignore reader))
(let ((char-code (call-next-method)))
(case char-code
- (#.(char-code #\Return) #.(char-code #\Newline))
+ (#.+cr+ #.(char-code #\Newline))
(otherwise char-code))))
(defmethod octets-to-char-code ((format flexi-crlf-mixin) reader)
@@ -251,13 +479,13 @@
(declare (ignore reader))
(let ((char-code (call-next-method)))
(case char-code
- (#.(char-code #\Return)
+ (#.+cr+
(let ((next-char-code (call-next-method)))
(case next-char-code
- (#.(char-code #\Linefeed) #.(char-code #\Newline))
+ (#.+lf+ #.(char-code #\Newline))
;; we saw a CR but no LF afterwards, but then the data
;; ended, so we just return #\Return
- ((nil) #.(char-code #\Return))
+ ((nil) +cr+)
;; if the character we peeked at wasn't a
;; linefeed character we unread its constituents
(otherwise (funcall *current-unreader* (code-char next-char-code))
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html (original)
+++ branches/edi/doc/index.html Sat May 24 21:43:56 2008
@@ -116,6 +116,7 @@
<li><a href="#string-to-octets"><code>string-to-octets</code></a>
<li><a href="#octets-to-string"><code>octets-to-string</code></a>
<li><a href="#octet-length"><code>octet-length</code></a>
+ <li><a href="#char-length"><code>char-length</code></a>
</ol>
</ol>
<li><a href="#position">File positions</a>
@@ -1005,16 +1006,30 @@
</blockquote>
<p><br>[Function]
-<br><a class=none name="octet-length"><b>octet-length</b> <i>string <tt>&key</tt> external-format start end</i> => <i>length-or-nil</i></a>
+<br><a class=none name="octet-length"><b>octet-length</b> <i>string <tt>&key</tt> external-format start end</i> => <i>length</i></a>
<blockquote><br>
Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
<a href="#octet">octets</a> if encoded using
the <a href="#external-formats">external format</a> designated
-by <code><i>external-format</i></code>. Might return <code>NIL</code>
-if there's no efficient way to compute the length without iterating
-through the whole string.
+by <code><i>external-format</i></code>.
+The defaults for
+<code><i>start</i></code> and <code><i>end</i></code>
+are <code>0</code> and the length of the string. The default
+for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+</blockquote>
+
+<p><br>[Function]
+<br><a class=none name="char-length"><b>char-length</b> <i>sequence <tt>&key</tt> external-format start end</i> => <i>length</i></a>
+
+<blockquote><br>
+
+Kind of the inverse of <a href="#octet-length"><code>OCTET-LENGTH</code></a>.
+Returns the length of the subsequence (of <a href="#octet">octets</a>) of <code><i>sequence</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
+characters if decoded using
+the <a href="#external-formats">external format</a> designated
+by <code><i>external-format</i></code>.
The defaults for
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
@@ -1060,7 +1075,7 @@
his work on making FLEXI-STREAMS faster.
<p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.111 2008/05/23 14:56:47 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.112 2008/05/25 01:41:25 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp (original)
+++ branches/edi/packages.lisp Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.35 2008/05/21 01:43:43 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.36 2008/05/25 01:40:54 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -40,6 +40,7 @@
(:export :*default-eol-style*
:*default-little-endian*
:*substitution-char*
+ :char-length
:external-format-eol-style
:external-format-error
:external-format-error-external-format
Modified: branches/edi/specials.lisp
==============================================================================
--- branches/edi/specials.lisp (original)
+++ branches/edi/specials.lisp Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.32 2008/05/20 23:01:51 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.33 2008/05/25 01:40:54 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -49,6 +49,10 @@
"Like *STANDARD-OPTIMIZE-SETTINGS*, but \(on LispWorks) with all
arithmetic being fixnum arithmetic.")
+(defconstant +lf+ (char-code #\Linefeed))
+
+(defconstant +cr+ (char-code #\Return))
+
(defvar *current-unreader* nil
"A unary function which might be called to `unread' a character
\(i.e. the sequence of octets it represents).
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.26 2008/05/25 01:41:32 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -116,12 +116,17 @@
(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
"Returns the length of the substring of STRING from START to END in
-octets if encoded using the external format EXTERNAL-FORMAT. Might
-return NIL if there's no efficient way to compute the length without
-iterating through the whole string."
+octets if encoded using the external format EXTERNAL-FORMAT."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end) (string string))
(setq external-format (maybe-convert-external-format external-format))
- (let ((factor (encoding-factor external-format)))
- (typecase factor
- (fixnum (* factor (- end start))))))
+ (compute-number-of-octets external-format string start end))
+
+(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
+ "Kind of the inverse of OCTET-LENGTH. Returns the length of the
+subsequence \(of octets) of SEQUENCE from START to END in characters
+if decoded using the external format EXTERNAL-FORMAT."
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end) (string string))
+ (setq external-format (maybe-convert-external-format external-format))
+ (compute-number-of-chars external-format sequence start end))
Modified: branches/edi/util.lisp
==============================================================================
--- branches/edi/util.lisp (original)
+++ branches/edi/util.lisp Sat May 24 21:43:56 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.22 2008/05/25 01:40:54 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -115,7 +115,7 @@
(unless (find real-name +name-map+
:test #'eq
:key #'cdr)
- (error 'external-format-error
+ (error 'external-format-simple-error
:format-control "~S is not known to be a name for an external format."
:format-arguments (list name)))
real-name))
1
0
Author: eweitz
Date: Sat May 24 19:34:51 2008
New Revision: 53
Added:
branches/edi/conditions.lisp
Modified:
branches/edi/encode.lisp
branches/edi/output.lisp
branches/edi/strings.lisp
branches/edi/test/test.lisp
Log:
Faster encoding - passes all tests on LW
Added: branches/edi/conditions.lisp
==============================================================================
--- (empty file)
+++ branches/edi/conditions.lisp Sat May 24 19:34:51 2008
@@ -0,0 +1,108 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;; * Redistributions of source code must retain the above copyright
+;;; notice, this list of conditions and the following disclaimer.
+
+;;; * Redistributions in binary form must reproduce the above
+;;; copyright notice, this list of conditions and the following
+;;; disclaimer in the documentation and/or other materials
+;;; provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(define-condition flexi-stream-error (stream-error)
+ ()
+ (:documentation "Superclass for all errors related to flexi
+streams."))
+
+(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition)
+ ()
+ (:documentation "Like FLEXI-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition flexi-stream-element-type-error (flexi-stream-error)
+ ((element-type :initarg :element-type
+ :reader flexi-stream-element-type-error-element-type))
+ (:report (lambda (condition stream)
+ (format stream "Element type ~S not allowed."
+ (flexi-stream-element-type-error-element-type condition))))
+ (:documentation "Errors of this type are signalled if the flexi
+stream has a wrong element type."))
+
+(define-condition flexi-stream-out-of-sync-error (flexi-stream-error)
+ ()
+ (:report (lambda (condition stream)
+ (declare (ignore condition))
+ (format stream "Stream out of sync from previous
+lookahead, couldn't rewind.")))
+ (:documentation "This can happen if you're trying to write to an IO
+stream which had prior to that `looked ahead' while reading and now
+can't `rewind' to the octet where you /should/ be."))
+
+(define-condition in-memory-stream-error (stream-error)
+ ()
+ (:documentation "Superclass for all errors related to
+IN-MEMORY streams."))
+
+(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition)
+ ()
+ (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition in-memory-stream-closed-error (in-memory-stream-error)
+ ()
+ (:report (lambda (condition stream)
+ (format stream "~S is closed."
+ (stream-error-stream condition))))
+ (:documentation "An error that is signalled when someone is trying
+to read from or write to a closed IN-MEMORY stream."))
+
+(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error)
+ ((position-spec :initarg :position-spec
+ :reader in-memory-stream-position-spec-error-position-spec))
+ (:documentation "Errors of this type are signalled if an erroneous
+position spec is used in conjunction with FILE-POSITION."))
+
+(define-condition external-format-error ()
+ ((external-format :initarg :external-format
+ :initform nil
+ :reader external-format-error-external-format))
+ (:documentation "Superclass for all errors related to external
+formats."))
+
+(define-condition external-format-simple-error (external-format-error simple-condition)
+ ()
+ (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting
+capabilities."))
+
+(define-condition external-format-encoding-error (external-format-simple-error)
+ ()
+ (:documentation "Errors of this type are signalled if there is an
+encoding problem."))
+
+(defun signal-encoding-error (external-format format-control &rest format-args)
+ "Convenience function similar to ERROR to signal conditions of type
+EXTERNAL-FORMAT-ENCODING-ERROR."
+ (error 'external-format-encoding-error
+ :format-control format-control
+ :format-arguments format-args
+ :external-format external-format))
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,125 @@
(in-package :flexi-streams)
+(defgeneric compute-number-of-octets (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+ ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (+ (call-next-method)
+ (* (case (external-format-name format)
+ (:utf-32 4)
+ (otherwise 1))
+ (count #\Newline sequence :start start :end end :test #'char=))))
+
(defgeneric char-to-octets (format char writer)
(declare #.*standard-optimize-settings*)
(:documentation "Converts the character CHAR to a sequence of octets
@@ -37,72 +156,188 @@
repeatedly each octet. The return value of this function is
unspecified."))
-(defmethod char-to-octets ((format flexi-latin-1-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((octet (char-code char)))
+(defgeneric write-sequence* (format stream sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "A generic function which dispatches on the external
+format and does the real work for STREAM-WRITE-SEQUENCE."))
+
+(defgeneric string-to-octets* (format string start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "A generic function which dispatches on the external
+format and does the real work for STRING-TO-OCTETS."))
+
+(defmacro define-sequence-writers ((format-class) &body body)
+ "Utility macro which defines methods for WRITE-SEQUENCE* and
+STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the
+docstring of DEFINE-CHAR-ENCODERS."
+ `(progn
+ (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end))
+ (with-accessors ((column flexi-stream-column))
+ stream
+ (let* ((octet-seen-p nil)
+ (buffer-pos 0)
+ ;; estimate should be good enough...
+ (factor (encoding-factor format))
+ ;; we don't want arbitrarily large buffer, do we?
+ (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+ (buffer (make-octet-buffer buffer-size)))
+ (declare (fixnum buffer-pos buffer-size)
+ (boolean octet-seen-p)
+ (type (array octet *) buffer))
+ (macrolet ((octet-writer (form)
+ `(write-octet ,form)))
+ (labels ((flush-buffer ()
+ "Sends all octets in BUFFER to the underlying stream."
+ (write-sequence buffer stream :end buffer-pos)
+ (setq buffer-pos 0))
+ (write-octet (octet)
+ "Adds one octet to the buffer and flushes it if necessary."
+ (declare (octet octet))
+ (when (>= buffer-pos buffer-size)
+ (flush-buffer))
+ (setf (aref buffer buffer-pos) octet)
+ (incf buffer-pos))
+ (write-object (object)
+ "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+ (etypecase object
+ (octet (setq octet-seen-p t)
+ (write-octet object))
+ (character (symbol-macrolet ((char-getter object))
+ ,@body)))))
+ (macrolet ((iterate (&body output-forms)
+ "An unhygienic macro to implement the actual
+iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+ `(loop for index of-type fixnum from start below end
+ do (progn ,@output-forms)
+ finally (when (plusp buffer-pos)
+ (flush-buffer)))))
+ (etypecase sequence
+ (string (iterate
+ (symbol-macrolet ((char-getter (char sequence index)))
+ ,@body)))
+ (array (iterate
+ (symbol-macrolet ((char-getter (aref sequence index)))
+ ,@body)))
+ (list (iterate (write-object (nth index sequence))))))
+ ;; update the column slot, setting it to NIL if we sent
+ ;; octets
+ (setq column
+ (cond (octet-seen-p nil)
+ (t (let ((last-newline-pos (position #\Newline sequence
+ :test #'char=
+ :start start
+ :end end
+ :from-end t)))
+ (cond (last-newline-pos (- end last-newline-pos 1))
+ (column (+ column (- end start)))))))))))))
+ (defmethod string-to-octets* ((format ,format-class) string start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end) (string string))
+ (let ((octets (make-array (compute-number-of-octets format string start end)
+ :element-type 'octet))
+ (j 0))
+ (declare (fixnum j))
+ (loop for i of-type fixnum from start below end do
+ (macrolet ((octet-writer (form)
+ `(progn
+ (setf (aref (the (array octet *) octets) j) ,form)
+ (incf j))))
+ (symbol-macrolet ((char-getter (char string i)))
+ (progn ,@body))))
+ octets))))
+
+;; char-getter can be called more than once - no side effects
+(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
+ "Utility macro which defines several encoding-related methods for
+the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
+it is assumed that CR-FORMAT-CLASS is the same encoding as
+FORMAT-CLASS but with CR line endings and similar for
+CRLF-FORMAT-CLASS. BODY is a code template for the code to convert
+one character to octets. BODY must contain a symbol CHAR-GETTER
+representing the form which is used to obtain the character and a
+forms like \(OCTET-WRITE <thing>) to write the octet <thing>. The
+CHAR-GETTER form might be called more than once."
+ (let ((body `((locally
+ (declare #.*fixnum-optimize-settings*)
+ ,@body))))
+ `(progn
+ (defmethod char-to-octets ((format ,format-class) char writer)
+ (declare (character char) (function writer))
+ (symbol-macrolet ((char-getter char))
+ (macrolet ((octet-writer (form)
+ `(funcall writer ,form)))
+ ,@body)))
+ (define-sequence-writers (,format-class) ,@body)
+ (define-sequence-writers (,cr-format-class)
+ ,@(sublis `((char-getter . ,(with-unique-names (char)
+ `(let ((,char char-getter))
+ (declare (character ,char))
+ (if (char= ,char #\Newline)
+ #\Return
+ ,char)))))
+ body))
+ (define-sequence-writers (,crlf-format-class)
+ ,(with-unique-names (char write-char)
+ `(flet ((,write-char (,char)
+ ,@(sublis `((char-getter . ,char)) body)))
+ (let ((,char char-getter))
+ (declare (character ,char))
+ (cond ((char= ,char #\Newline)
+ (,write-char #\Return)
+ (,write-char #\Newline))
+ (t (,write-char ,char))))))))))
+
+(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
+ (let ((octet (char-code char-getter)))
(when (> octet 255)
- (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet))
- (funcall writer octet)))
+ (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
+ (octet-writer octet)))
-(defmethod char-to-octets ((format flexi-ascii-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((octet (char-code char)))
+(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+ (let ((octet (char-code char-getter)))
(when (> octet 127)
- (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet))
- (funcall writer octet)))
+ (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
+ (octet-writer octet)))
-(defmethod char-to-octets ((format flexi-8-bit-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
+(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
(with-accessors ((encoding-hash external-format-encoding-hash))
format
- (let ((octet (gethash (char-code char) encoding-hash)))
+ (let ((octet (gethash (char-code char-getter) encoding-hash)))
(unless octet
- (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet))
- (funcall writer octet))))
+ (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
+ (octet-writer octet))))
-(defmethod char-to-octets ((format flexi-utf-8-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((char-code (char-code char)))
+(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
+ (let ((char-code (char-code char-getter)))
(tagbody
(cond ((< char-code #x80)
- (funcall writer char-code)
+ (octet-writer char-code)
(go zero))
((< char-code #x800)
- (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code)))
+ (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code)))
(go one))
((< char-code #x10000)
- (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code)))
+ (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code)))
(go two))
- ((< char-code #x200000)
- (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code)))
- (go three))
- ((< char-code #x4000000)
- (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code)))
- (go four))
- (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100))))
- (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code)))
- four
- (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code)))
- three
- (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code)))
+ (t
+ (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code)))))
+ (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code)))
two
- (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code)))
+ (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code)))
one
- (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code)))
+ (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code)))
zero)))
-(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
(flet ((write-word (word)
- (funcall writer (ldb (byte 8 0) word))
- (funcall writer (ldb (byte 8 8) word))))
+ (octet-writer (ldb (byte 8 0) word))
+ (octet-writer (ldb (byte 8 8) word))))
(declare (inline write-word))
- (let ((char-code (char-code char)))
+ (let ((char-code (char-code char-getter)))
(declare (type char-code-integer char-code))
(cond ((< char-code #x10000)
(write-word char-code))
@@ -110,14 +345,12 @@
(write-word (logior #xd800 (ldb (byte 10 10) char-code)))
(write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
-(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
(flet ((write-word (word)
- (funcall writer (ldb (byte 8 8) word))
- (funcall writer (ldb (byte 8 0) word))))
+ (octet-writer (ldb (byte 8 8) word))
+ (octet-writer (ldb (byte 8 0) word))))
(declare (inline write-word))
- (let ((char-code (char-code char)))
+ (let ((char-code (char-code char-getter)))
(declare (type char-code-integer char-code))
(cond ((< char-code #x10000)
(write-word char-code))
@@ -125,23 +358,19 @@
(write-word (logior #xd800 (ldb (byte 10 10) char-code)))
(write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
-(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((char-code (char-code char)))
- (funcall writer (ldb (byte 8 0) char-code))
- (funcall writer (ldb (byte 8 8) char-code))
- (funcall writer (ldb (byte 8 16) char-code))
- (funcall writer (ldb (byte 8 24) char-code))))
-
-(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer)
- (declare #.*fixnum-optimize-settings*)
- (declare (character char) (function writer))
- (let ((char-code (char-code char)))
- (funcall writer (ldb (byte 8 24) char-code))
- (funcall writer (ldb (byte 8 16) char-code))
- (funcall writer (ldb (byte 8 8) char-code))
- (funcall writer (ldb (byte 8 0) char-code))))
+(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
+ (let ((char-code (char-code char-getter)))
+ (octet-writer (ldb (byte 8 0) char-code))
+ (octet-writer (ldb (byte 8 8) char-code))
+ (octet-writer (ldb (byte 8 16) char-code))
+ (octet-writer (ldb (byte 8 24) char-code))))
+
+(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
+ (let ((char-code (char-code char-getter)))
+ (octet-writer (ldb (byte 8 24) char-code))
+ (octet-writer (ldb (byte 8 16) char-code))
+ (octet-writer (ldb (byte 8 8) char-code))
+ (octet-writer (ldb (byte 8 0) char-code))))
(defmethod char-to-octets ((format flexi-cr-mixin) char writer)
(declare #.*fixnum-optimize-settings*)
Modified: branches/edi/output.lisp
==============================================================================
--- branches/edi/output.lisp (original)
+++ branches/edi/output.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.65 2008/05/24 23:15:25 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -129,7 +129,7 @@
;; needed for AllegroCL - grrr...
(stream-write-char stream #\Newline))
-(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key)
+(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key)
"An optimized version which uses a buffer underneath. The function
can accepts characters as well as octets and it decides what to do
based on the element type of the sequence \(if possible) or on the
@@ -141,7 +141,7 @@
(with-accessors ((column flexi-stream-column)
(external-format flexi-stream-external-format)
(stream flexi-stream-stream))
- stream
+ flexi-output-stream
(when (>= start end)
(return-from stream-write-sequence sequence))
(when (and (vectorp sequence)
@@ -151,59 +151,8 @@
(setq column nil)
(return-from stream-write-sequence
(write-sequence sequence stream :start start :end end)))
- (let* ((octet-seen-p nil)
- (buffer-pos 0)
- (factor (encoding-factor external-format))
- (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
- (buffer (make-octet-buffer buffer-size)))
- (declare (fixnum buffer-pos buffer-size)
- (boolean octet-seen-p)
- (type (array octet *) buffer))
- (labels ((flush-buffer ()
- "Sends all octets in BUFFER to the underlying stream."
- (write-sequence buffer stream :end buffer-pos)
- (setq buffer-pos 0))
- (write-octet (octet)
- "Adds one octet to the buffer and flush it if necessary."
- (declare (octet octet))
- (when (>= buffer-pos buffer-size)
- (flush-buffer))
- (setf (aref buffer buffer-pos) octet)
- (incf buffer-pos))
- (write-character (char)
- "Adds the octets representing the character CHAR to the buffer."
- (char-to-octets external-format char #'write-octet))
- (write-object (object)
- "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
- (etypecase object
- (octet (setq octet-seen-p t)
- (write-octet object))
- (character (write-character object)))))
- (declare (dynamic-extent (function write-octet)))
- (macrolet ((iterate (output-form)
- "An unhygienic macro to implement the actual
-iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
- `(loop for index of-type fixnum from start below end
- do ,output-form
- finally (when (plusp buffer-pos)
- (flush-buffer)))))
- (etypecase sequence
- (string (iterate (write-character (char sequence index))))
- (array (iterate (write-object (aref sequence index))))
- (list (iterate (write-object (nth index sequence)))))
- ;; update the column slot, setting it to NIL if we sent
- ;; octets
- (setq column
- (cond (octet-seen-p nil)
- (t (let ((last-newline-pos (position #\Newline sequence
- :test #'char=
- :start start
- :end end
- :from-end t)))
- (cond (last-newline-pos (- end last-newline-pos 1))
- (column (+ column (- end start))))))))))))
+ ;; otherwise hand over to the external format to do the work
+ (write-sequence* external-format flexi-output-stream sequence start end))
sequence)
(defmethod stream-write-string ((stream flexi-output-stream) string
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -36,56 +36,10 @@
octets corresponding to the external format designated by
EXTERNAL-FORMAT."
(declare #.*standard-optimize-settings*)
- (declare (fixnum start end) (string string))
+ (declare (string string))
(setq external-format (maybe-convert-external-format external-format))
- (let ((factor (encoding-factor external-format))
- (length (- end start)))
- (declare (fixnum length))
- (etypecase factor
- (integer
- (let ((octets (make-array (* factor length) :element-type 'octet))
- (j 0))
- (declare (fixnum j))
- (flet ((writer (octet)
- (declare (octet octet))
- (setf (aref (the (array octet *) octets) j) octet)
- (incf j)))
- (declare (dynamic-extent (function writer)))
- (loop for i of-type fixnum from start below end do
- (char-to-octets external-format
- (char string i)
- #'writer)))
- octets))
- (double-float
- ;; this is a bit clunky but hopefully a bit more efficient than
- ;; using VECTOR-PUSH-EXTEND
- (let* ((octets-length (ceiling (* factor length)))
- (octets (make-array octets-length
- :element-type 'octet
- :fill-pointer t
- :adjustable t))
- (i start)
- (j 0))
- (declare (fixnum i j octets-length)
- (double-float factor))
- (flet ((writer (octet)
- (declare (octet octet))
- (when (>= j octets-length)
- (setq factor (* factor 2.0d0))
- (incf octets-length (the fixnum (ceiling (* factor (- end i)))))
- (adjust-array octets octets-length :fill-pointer t))
- (setf (aref (the (array octet *) octets) j) octet)
- (incf j)))
- (declare (dynamic-extent (function writer)))
- (loop
- (when (>= i end)
- (return))
- (char-to-octets external-format
- (char string i)
- #'writer)
- (incf i))
- (setf (fill-pointer octets) j)
- octets))))))
+ ;; the external format knows how to do it...
+ (string-to-octets* external-format string start end))
(defun octets-to-string (sequence &key
(external-format :latin1)
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp (original)
+++ branches/edi/test/test.lisp Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -329,10 +329,11 @@
(loop for i below (length seq1)
always (eql (elt seq1 i) (elt seq2 i)))))
-(defun read-sequence-test (pathspec external-format)
- "Several tests to confirm that READ-SEQUENCE behaves as expected."
- (with-test ((format nil "READ-SEQUENCE tests with format ~S."
- (flex::normalize-external-format external-format)))
+(defun sequence-test (pathspec external-format)
+ "Several tests to confirm that READ-SEQUENCE and WRITE-SEQUENCE
+behave as expected."
+ (with-test ((format nil "Sequence tests with format ~S and file ~A."
+ (flex::normalize-external-format external-format) pathspec))
(let* ((full-path (merge-pathnames pathspec *this-file*))
(file-string (file-as-string full-path external-format))
(string-length (length file-string))
@@ -397,7 +398,33 @@
(check (sequence-equal array (subseq file-string 25 (- string-length 25))))
(check (sequence-equal (loop repeat 25
collect (read-char in))
- (subseq file-string (- string-length 25)))))))))
+ (subseq file-string (- string-length 25))))))
+ (let ((path-out (ensure-directories-exist (merge-pathnames pathspec *tmp-dir*))))
+ (with-open-file (out path-out
+ :direction :output
+ :if-exists :supersede
+ :element-type 'octet)
+ (let ((out (make-flexi-stream out :external-format external-format)))
+ (write-sequence octets out)))
+ (check (file-equal full-path path-out))
+ (with-open-file (out path-out
+ :direction :output
+ :if-exists :supersede
+ :element-type 'octet)
+ (let ((out (make-flexi-stream out :external-format external-format)))
+ (write-sequence file-string out)))
+ (check (file-equal full-path path-out))
+ (with-open-file (out path-out
+ :direction :output
+ :if-exists :supersede
+ :element-type 'octet)
+ (let ((out (make-flexi-stream out :external-format external-format)))
+ (write-sequence file-string out :end 100)
+ (write-sequence octets out
+ :start (length (string-to-octets file-string
+ :external-format external-format
+ :end 100)))))
+ (check (file-equal full-path path-out))))))
(defmacro using-values ((&rest values) &body body)
"Executes BODY and feeds an element from VALUES to the USE-VALUE
@@ -544,7 +571,7 @@
nconc (create-test-combinations file-name symbols t))))
(incf no-tests (length read-sequence-test-args-list))
(dolist (args read-sequence-test-args-list)
- (apply 'read-sequence-test args)))
+ (apply 'sequence-test args)))
(incf no-tests)
(error-handling-test)
(incf no-tests)
1
0
Author: eweitz
Date: Sat May 24 19:29:57 2008
New Revision: 52
Added:
branches/edi/
- copied from r51, trunk/
Log:
More needless optimization
1
0