Author: eweitz Date: Sun May 25 19:43:22 2008 New Revision: 61
Modified: branches/edi/CHANGELOG branches/edi/conditions.lisp branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/flexi-streams.asd branches/edi/length.lisp branches/edi/packages.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Ready for release
Modified: branches/edi/CHANGELOG ============================================================================== --- branches/edi/CHANGELOG (original) +++ branches/edi/CHANGELOG Sun May 25 19:43:22 2008 @@ -1,3 +1,10 @@ +Version 1.0.0 +2008-05-26 +More redesign for the sake of performance +More checks for invalid data +More tests +Exported functions for length computation + Version 0.15.3 2008-05-23 Avoid CHANGE-CLASS on LispWorks if possible
Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.9 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -93,21 +93,11 @@ () (:documentation "Superclass for all errors related to external formats.")) - -(define-condition external-format-warning (external-format-condition warning) - () - (:documentation "Superclass for all warnings related to external -formats."))
(define-condition external-format-encoding-error (external-format-error) () (:documentation "Errors of this type are signalled if there is an encoding problem.")) - -(define-condition external-format-encoding-warning (external-format-warning) - () - (:documentation "Warnings of this type are signalled if there is an -encoding problem."))
(defun signal-encoding-error (external-format format-control &rest format-args) "Convenience function similar to ERROR to signal conditions of type @@ -116,11 +106,3 @@ :format-control format-control :format-arguments format-args :external-format external-format)) - -(defun signal-encoding-warning (external-format format-control &rest format-args) - "Convenience function similar to WARN to signal conditions of type -EXTERNAL-FORMAT-ENCODING-WARNING." - (warn 'external-format-encoding-warning - :format-control format-control - :format-arguments format-args - :external-format external-format))
Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.29 2008/05/25 23:19:19 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -202,7 +202,7 @@ (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (let* ((i start) - (string-length (compute-number-of-chars format sequence start end nil)) + (string-length (compute-number-of-chars format sequence start end)) (string (make-array string-length :element-type 'char*))) (declare (fixnum i string-length)) (loop for j of-type fixnum from 0 below string-length @@ -223,39 +223,46 @@ encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. BODY is a code template for the code to read octets and return one -character. BODY must contain a symbol OCTET-GETTER representing the -form which is used to obtain the next octet." - `(progn - (defmethod octets-to-char-code ((format ,lf-format-class) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) - (symbol-macrolet ((octet-getter (funcall reader))) - ,@(sublis '((char-decoder . octets-to-char-code)) - body))) - (define-sequence-readers (,lf-format-class) ,@body) - (define-sequence-readers (,cr-format-class) - ,(with-unique-names (char-code) - `(let ((,char-code (progn ,@body))) - (case ,char-code - (#.+cr+ #.(char-code #\Newline)) - (otherwise ,char-code))))) - (define-sequence-readers (,crlf-format-class) - ,(with-unique-names (char-code next-char-code get-char-code) - `(flet ((,get-char-code () ,@body)) - (let ((,char-code (,get-char-code))) +character code. BODY must contain a symbol OCTET-GETTER representing +the form which is used to obtain the next octet." + (let* ((body (with-unique-names (char-code) + `((let ((,char-code (progn ,@body))) + (when (and ,char-code + (or (<= #xd8 (logand* #x00ff (ash* ,char-code -8)) #xdf) + (> ,char-code #x10ffff))) + (recover-from-encoding-error format "Illegal code point ~A (#x~:*~X)." ,char-code)) + ,char-code))))) + `(progn + (defmethod octets-to-char-code ((format ,lf-format-class) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (symbol-macrolet ((octet-getter (funcall reader))) + ,@(sublis '((char-decoder . octets-to-char-code)) + body))) + (define-sequence-readers (,lf-format-class) ,@body) + (define-sequence-readers (,cr-format-class) + ,(with-unique-names (char-code) + `(let ((,char-code (progn ,@body))) (case ,char-code - (#.+cr+ - (let ((,next-char-code (,get-char-code))) - (case ,next-char-code - (#.+lf+ #.(char-code #\Newline)) - ;; we saw a CR but no LF afterwards, but then the data - ;; ended, so we just return #\Return - ((nil) +cr+) - ;; if the character we peeked at wasn't a - ;; linefeed character we unread its constituents - (otherwise (unget (code-char ,next-char-code)) - ,char-code)))) - (otherwise ,char-code)))))))) + (#.+cr+ #.(char-code #\Newline)) + (otherwise ,char-code))))) + (define-sequence-readers (,crlf-format-class) + ,(with-unique-names (char-code next-char-code get-char-code) + `(flet ((,get-char-code () ,@body)) + (let ((,char-code (,get-char-code))) + (case ,char-code + (#.+cr+ + (let ((,next-char-code (,get-char-code))) + (case ,next-char-code + (#.+lf+ #.(char-code #\Newline)) + ;; we saw a CR but no LF afterwards, but then the data + ;; ended, so we just return #\Return + ((nil) +cr+) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise (unget (code-char ,next-char-code)) + ,char-code)))) + (otherwise ,char-code)))))))))
(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) octet-getter) @@ -296,25 +303,28 @@ (multiple-value-bind (start count) (cond ((not (logbitp 7 octet)) (values octet 0)) - ((= #b11000000 (logand octet #b11100000)) - (values (logand octet #b00011111) 1)) - ((= #b11100000 (logand octet #b11110000)) - (values (logand octet #b00001111) 2)) - ((= #b11110000 (logand octet #b11111000)) - (values (logand octet #b00000111) 3)) + ((= #b11000000 (logand* octet #b11100000)) + (when (= #b11000000 (logand* octet #b11111110)) + (return-from char-decoder + (recover-from-encoding-error format + "Illegal value #x~X leads to `overlong' UTF-8 sequence." + octet))) + (values (logand* octet #b00011111) 1)) + ((= #b11100000 (logand* octet #b11110000)) + (values (logand* octet #b00001111) 2)) + ((= #b11110000 (logand* octet #b11111000)) + (values (logand* octet #b00000111) 3)) (t (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X at start of UTF-8 sequence." octet)))) (declare (fixnum count)) - ;; note that we currently don't check for "overlong" - ;; sequences or other illegal values (loop for result of-type code-point - = start then (+ (ash result 6) - (logand octet #b111111)) + = start then (+ (ash* result 6) + (logand* octet #b111111)) repeat count for octet of-type octet = (read-next-byte) - unless (= #b10000000 (logand octet #b11000000)) + unless (= #b10000000 (logand* octet #b11000000)) do (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X in UTF-8 sequence." octet)) @@ -334,7 +344,7 @@ (setq first-octet-seen t)))) (flet ((read-next-word () (+ (the octet (read-next-byte)) - (ash (the octet (read-next-byte)) 8)))) + (ash* (the octet (read-next-byte)) 8)))) (declare (inline read-next-word)) (let ((word (read-next-word))) (declare (type (unsigned-byte 16) word)) @@ -346,8 +356,8 @@ (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) + (+ (ash* (logand* #b1111111111 word) 10) + (logand* #b1111111111 next-word) #x10000))) (t word)))))))
@@ -364,7 +374,7 @@ (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () - (+ (ash (the octet (read-next-byte)) 8) + (+ (ash* (the octet (read-next-byte)) 8) (the octet (read-next-byte))))) (declare (inline read-next-word)) (let ((word (read-next-word))) @@ -377,8 +387,8 @@ (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) + (+ (ash* (logand* #b1111111111 word) 10) + (logand* #b1111111111 next-word) #x10000))) (t word)))))))
@@ -396,7 +406,7 @@ (setq first-octet-seen t)))) (loop for count of-type fixnum from 0 to 24 by 8 for octet of-type octet = (read-next-byte) - sum (ash octet count))))) + sum (ash* octet count)))))
(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) (let (first-octet-seen) @@ -412,7 +422,7 @@ (setq first-octet-seen t)))) (loop for count of-type fixnum from 24 downto 0 by 8 for octet of-type octet = (read-next-byte) - sum (ash octet count))))) + sum (ash* octet count)))))
(defmethod octets-to-char-code ((format flexi-cr-mixin) reader) (declare #.*fixnum-optimize-settings*)
Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sun May 25 19:43:22 2008 @@ -72,7 +72,6 @@ <li><a href="#external-format-condition"><code>external-format-condition</code></a> <li><a href="#external-format-condition-external-format"><code>external-format-condition-external-format</code></a> <li><a href="#external-format-error"><code>external-format-error</code></a> - <li><a href="#external-format-warning"><code>external-format-warning</code></a> <li><a href="#external-format-encoding-error"><code>external-format-encoding-error</code></a> <li><a href="#*substitution-char*"><code>*substitution-char*</code></a> </ol> @@ -229,7 +228,7 @@ <p> FLEXI-STREAMS together with this documentation can be downloaded from <a href="http://weitz.de/files/flexi-streams.tar.gz">http://weitz.de/files/flexi-streams.tar.gz</a>. The -current version is 0.15.3. +current version is 1.0.0. <p> Before you install FLEXI-STREAMS you first need to install the <a @@ -548,14 +547,6 @@ </blockquote>
<p><br>[Condition] -<br><a class=none name="external-format-warning"><b>external-format-warning</b></a> - -<blockquote><br> -All warnings related to <a href="#external-formats">external formats</a> are of this type. -This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>. -</blockquote> - -<p><br>[Condition] <br><a class=none name="external-format-error"><b>external-format-error</b></a>
<blockquote><br> @@ -1063,7 +1054,7 @@ The defaults for <code><i>start</i></code> and <code><i>end</i></code> are <code>0</code> and the length of the sequence. The default -for <code><i>external-format</i></code> is <code>:LATIN1</code>. +for <code><i>external-format</i></code> is <code>:LATIN1</code>. Note that this function doesn't check for the validity of the data in <code><i>sequence</i></code>. <p> This function is optimized for the case of <code><i>sequence</i></code> being @@ -1110,7 +1101,7 @@ his work on making FLEXI-STREAMS faster.
<p> -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.119 2008/05/25 23:42:30 edi Exp $ <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.71 2008/05/25 23:42:28 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -35,7 +35,7 @@ (in-package :flexi-streams-system)
(defsystem :flexi-streams - :version "0.15.3" + :version "1.0.0" :serial t :components ((:file "packages") (:file "mapping")
Modified: branches/edi/length.lisp ============================================================================== --- branches/edi/length.lisp (original) +++ branches/edi/length.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.4 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -72,51 +72,50 @@ ;; the estimate unexact (* 1.02d0 (call-next-method)))
-(defgeneric check-end (format start end i warnp) +(defgeneric check-end (format start end i) (declare #.*fixnum-optimize-settings*) (:documentation "Helper function used below to determine if we tried to read past the end of the sequence.") - (:method (format start end i warnp) + (:method (format start end i) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end i)) - (when (and warnp (> i end)) - (signal-encoding-warning format "These ~A octet~:P can't be ~ + (when (> i end) + (signal-encoding-error format "These ~A octet~:P can't be ~ decoded using ~A as the sequence is too short. ~A octet~:P missing ~ at then end." - (- end start) - (external-format-name format) - (- i end)))) - (:method ((format flexi-utf-16-format) start end i warnp) + (- end start) + (external-format-name format) + (- i end)))) + (:method ((format flexi-utf-16-format) start end i) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end i)) - (declare (ignore i warnp)) + (declare (ignore i)) ;; don't warn twice (when (evenp (- end start)) (call-next-method))))
-(defgeneric compute-number-of-chars (format sequence start end warnp) +(defgeneric compute-number-of-chars (format sequence start end) (declare #.*standard-optimize-settings*) (:documentation "Computes the exact number of characters required to decode the sequence of octets in SEQUENCE from START to END using the -external format FORMAT. If WARNP is NIL, warnings will be muffled.")) +external format FORMAT."))
-(defmethod compute-number-of-chars :around (format (list list) start end warnp) +(defmethod compute-number-of-chars :around (format (list list) start end) (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'vector) start end warnp)) + (call-next-method format (coerce list 'vector) start end))
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence warnp)) + (declare (ignore sequence)) (- end start))
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end) ;; this method only applies to the 8-bit formats as all other ;; formats with CRLF line endings have their own specialized methods ;; below (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) - (declare (ignore warnp)) (let ((i start) (length (- end start))) (declare (fixnum i length)) @@ -130,7 +129,7 @@ (decf length))) length))
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -140,17 +139,18 @@ (when (>= i end) (return)) (let* ((octet (aref sequence i)) + ;; note that there are no validity checks here (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) + ((= #b11000000 (logand* octet #b11100000)) 2) + ((= #b11100000 (logand* octet #b11110000)) 3) (t 4)))) (declare (fixnum length) (type octet octet)) (incf sum) (incf i length))) - (check-end format start end i warnp) + (check-end format start end i) sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -161,28 +161,29 @@ (when (>= i end) (return)) (let* ((octet (aref sequence i)) + ;; note that there are no validity checks here (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) + ((= #b11000000 (logand* octet #b11100000)) 2) + ((= #b11100000 (logand* octet #b11110000)) 3) (t 4)))) (declare (fixnum length) (type octet octet)) (unless (and (= octet +lf+) (= last-octet +cr+)) (incf sum)) (incf i length) (setq last-octet octet))) - (check-end format start end i warnp) + (check-end format start end i) sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) +(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (declare (ignore sequence)) - (when (and warnp (oddp (- end start))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ + (when (oddp (- end start)) + (signal-encoding-error format "~A octet~:P cannot be decoded ~ using UTF-16 as ~:*~A is not even." - (- end start)))) + (- end start))))
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) @@ -198,10 +199,10 @@ (declare (fixnum length) (type octet high-octet)) (incf sum) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum))
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -217,10 +218,10 @@ (declare (fixnum length) (type octet high-octet)) (incf sum) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -243,10 +244,10 @@ (aref sequence i) 0)) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -269,29 +270,28 @@ (aref sequence (1+ i)) 0)) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) +(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (declare (ignore sequence)) (let ((length (- end start))) - (when (and warnp (plusp (mod length 4))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ + (when (plusp (mod length 4)) + (signal-encoding-error format "~A octet~:P cannot be decoded ~ using UTF-32 as ~:*~A is not a multiple-value of four." - length)))) + length))))
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence warnp)) + (declare (ignore sequence)) (ceiling (- end start) 4))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) - (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) (decf end 8) @@ -306,10 +306,9 @@ (t (incf i 4)))) length))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) - (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) (decf end 8)
Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.38 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -46,12 +46,10 @@ :external-format-eol-style :external-format-error :external-format-encoding-error - :external-format-encoding-warning :external-format-equal :external-format-id :external-format-little-endian :external-format-name - :external-format-warning :flexi-input-stream :flexi-output-stream :flexi-io-stream
Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.32 2008/05/25 23:09:13 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -72,11 +72,12 @@ (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence))) "Kind of the inverse of OCTET-LENGTH. Returns the length of the subsequence (of octets) of SEQUENCE from START to END in characters -if decoded using the external format EXTERNAL-FORMAT. +if decoded using the external format EXTERNAL-FORMAT. Note that this +function doesn't check for the validity of the data in SEQUENCE.
This function is optimized for the case of SEQUENCE being a vector. Don't use lists if you're in a hurry." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) - (compute-number-of-chars external-format sequence start end t)) + (compute-number-of-chars external-format sequence start end))
Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.35 2008/05/25 23:10:47 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,48 @@
(in-package :flexi-streams-test)
+(defmacro with-test ((test-description) &body body) + "Defines a test. Two utilities are available inside of the body of +the maco: The function FAIL, and the macro CHECK. FAIL, the lowest +level utility, marks the test defined by WITH-TEST as failed. CHECK +checks whether its argument is true, otherwise it calls FAIL. If +during evaluation of the specified expression any condition is +signalled, this is also considered a failure. + +WITH-TEST prints reports while the tests run. It also increments +*TEST-SUCCESS-COUNT* if a test completes successfully." + (flex::with-unique-names (successp) + `(let ((,successp t)) + (flet ((fail (format-str &rest format-args) + (setf ,successp nil) + (apply #'format *error-output* format-str format-args))) + (macrolet ((check (expression) + `(handler-case + (unless ,expression + (fail "Expression ~S failed.~%" ',expression)) + (error (c) + (fail "Expression ~S failed signalling error of type ~A: ~A.~%" + ',expression (type-of c) c)))) + (with-expected-error ((condition-type) &body body) + `(handler-case (progn ,@body) + (,condition-type () t) + (:no-error (&rest args) + (declare (ignore args)) + (fail "Expected condition ~S not signalled~%" + ',condition-type))))) + (format *error-output* "Test ~S~%" ,test-description) + ,@body + (if ,successp + (incf *test-success-counter*) + (format *error-output* " Test failed!!!~%")) + (terpri *error-output*) + (terpri *error-output*)) + ,successp)))) + +;; LW can't indent this correctly because it's in a MACROLET +#+:lispworks +(editor:setup-indent "with-expected-error" 1 2 4) + (defconstant +buffer-size+ 8192 "Size of buffers for COPY-STREAM* below.")
@@ -245,37 +287,6 @@ (setf (fill-pointer string) (read-sequence string in)) string)))
-(defmacro with-test ((test-description) &body body) - "Defines a test. Two utilities are available inside of the body of -the maco: The function FAIL, and the macro CHECK. FAIL, the lowest -level utility, marks the test defined by WITH-TEST as failed. CHECK -checks whether its argument is true, otherwise it calls FAIL. If -during evaluation of the specified expression any condition is -signalled, this is also considered a failure. - -WITH-TEST prints reports while the tests run. It also increments -*TEST-SUCCESS-COUNT* if a test completes successfully." - (flex::with-unique-names (successp) - `(let ((,successp t)) - (flet ((fail (format-str &rest format-args) - (setf ,successp nil) - (apply #'format *error-output* format-str format-args))) - (macrolet ((check (expression) - `(handler-case - (unless ,expression - (fail "Expression ~S failed.~%" ',expression)) - (error (c) - (fail "Expression ~S failed signalling error of type ~A: ~A.~%" - ',expression (type-of c) c))))) - (format *error-output* "Test ~S~%" ,test-description) - ,@body - (if ,successp - (incf *test-success-counter*) - (format *error-output* " Test failed!!!~%")) - (terpri *error-output*) - (terpri *error-output*)) - ,successp)))) - (defun old-string-to-octets (string &key (external-format (make-external-format :latin1)) (start 0) end) @@ -460,7 +471,51 @@
(defun error-handling-test () "Tests several possible errors and how they are handled." - (with-test ("Handling of errors.") + (with-test ("Illegal values.") + (macrolet ((want-encoding-error (input format) + `(with-expected-error (external-format-encoding-error) + (read-flexi-line* ,input ,format)))) + ;; "overlong" + (want-encoding-error #(#b11000000) :utf-8) + (want-encoding-error #(#b11000001) :utf-8) + ;; examples of invalid lead octets + (want-encoding-error #(#b11111000) :utf-8) + (want-encoding-error #(#b11111001) :utf-8) + (want-encoding-error #(#b11111100) :utf-8) + (want-encoding-error #(#b11111101) :utf-8) + (want-encoding-error #(#b11111110) :utf-8) + (want-encoding-error #(#b11111111) :utf-8) + ;; illegal code points + (want-encoding-error #(#x00 #x00 #x11 #x00) :utf-32le) + (want-encoding-error #(#x00 #xd8) :utf-16le) + (want-encoding-error #(#xff #xdf) :utf-16le))) + (with-test ("Illegal lengths.") + (macrolet ((want-encoding-error (input format) + `(with-expected-error (external-format-encoding-error) + (read-flexi-line* ,input ,format)))) + ;; UTF-8 sequences which are too short + (want-encoding-error #(#xe4 #xf6 #xfc) :utf8) + (want-encoding-error #(#xc0) :utf8) + (want-encoding-error #(#xe0 #xff) :utf8) + (want-encoding-error #(#xf0 #xff #xff) :utf8) + ;; UTF-16 wants an even number of octets + (want-encoding-error #(#x01) :utf-16le) + (want-encoding-error #(#x01 #x01 #x01) :utf-16le) + (want-encoding-error #(#x01) :utf-16be) + (want-encoding-error #(#x01 #x01 #x01) :utf-16be) + ;; another word should follow but it doesn't + (want-encoding-error #(#x01 #xd8) :utf-16le) + (want-encoding-error #(#xd8 #x01) :utf-16be) + ;; UTF-32 always wants four octets + (want-encoding-error #(#x01) :utf-32le) + (want-encoding-error #(#x01 #x01) :utf-32le) + (want-encoding-error #(#x01 #x01 #x01) :utf-32le) + (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32le) + (want-encoding-error #(#x01) :utf-32be) + (want-encoding-error #(#x01 #x01) :utf-32be) + (want-encoding-error #(#x01 #x01 #x01) :utf-32be) + (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32be))) + (with-test ("Errors while decoding and substitution of characters.") ;; handling of EOF in the middle of CRLF (check (string= #.(string #\Return) (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) @@ -472,11 +527,7 @@ (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253))) (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))) ;; not a valid UTF-8 sequence - (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))) - (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))) - ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 - (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8))) - (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) + (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) (let ((*substitution-char* nil)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "abc" (using-values (#\b #\c) @@ -490,16 +541,12 @@ (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))) ;; not a valid UTF-8 sequence (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) - (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8)))) - (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) ;; only one byte (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le)))) - (check (string= "" (read-flexi-line* #(#x01) :utf-16le))) ;; two bytes, but value of resulting word suggests that another word follows (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le)))) - (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le)))) ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff (check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le)))) (check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le)))) @@ -507,11 +554,10 @@ (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be)))) (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be)))) (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be)))) - (check (string= "" (read-flexi-line* #(#x01) :utf-16be))) - (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be)))) (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be)))) - ;; the only case when error is signalled for UTF-32 is at end of file - ;; in the middle of 4-byte sequence, both for big and little endian + ;; the only case when errors are signalled for UTF-32 is at end + ;; of file in the middle of 4-byte sequence, both for big and + ;; little endian (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le)))) (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le)))) (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le)))) @@ -521,17 +567,7 @@ (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be)))) (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be)))) (check (string= "aY" (using-values (#\Y) - (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le)))) - (check (string= "aY" (using-values (#\Y) - (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be)))) - (check (string= "aY" (using-values (#\Y) - (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) + (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
(defun unread-char-test () "Tests whether UNREAD-CHAR behaves as expected." @@ -572,7 +608,7 @@ (incf no-tests (length read-sequence-test-args-list)) (dolist (args read-sequence-test-args-list) (apply 'sequence-test args))) - (incf no-tests) + (incf no-tests 3) (error-handling-test) (incf no-tests) (unread-char-test)