Author: eweitz Date: Tue May 20 04:03:28 2008 New Revision: 35
Modified: branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/input.lisp branches/edi/output.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Checkpoint
Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.12 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.14 2008/05/20 07:51:09 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -69,8 +69,7 @@ (declare #.*standard-optimize-settings*) (declare (function reader)) (when-let (octet (funcall reader)) - (declare (type octet octet)) - (if (> octet 127) + (if (> (the octet octet) 127) (recover-from-encoding-error format "No character which corresponds to octet #x~X." octet) octet))) @@ -81,8 +80,8 @@ (with-accessors ((decoding-table external-format-decoding-table)) format (when-let (octet (funcall reader)) - (declare (type octet octet)) - (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) octet))) + (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) + (the octet octet)))) (if (or (null char-code) (= (the char-code-integer char-code) 65533)) (recover-from-encoding-error format
Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Tue May 20 04:03:28 2008 @@ -196,7 +196,9 @@
<p> For more examples see the source code -of <a +of +<a href="http://mr-co.de/projects/cl-rfc2047/">CL-RFC2047</a>, +<a href="http://weitz.de/drakma/">Drakma</a>, <a href="http://weitz.de/chunga/">Chunga</a>, or <a href="http://weitz.de/cl-wbxml/">CL-WBXML</a>. @@ -970,29 +972,25 @@ <blockquote><br>
Converts the Lisp string <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> to an array of -<a href="#octet">octets</a> corresponding to the <a href="#external-formats">external format</a> <code><i>external-format</i></code>. The defaults for +<a href="#octet">octets</a> corresponding to the external format designated by <a href="#external-formats">external format</a> <code><i>external-format</i></code>. The defaults for <code><i>start</i></code> and <code><i>end</i></code> -are <code>0</code> and the length of the -string. The default for <code><i>external-format</i></code> is the -value of -evaluating <code>(<a -href="#make-external-format">MAKE-EXTERNAL-FORMAT</a> :LATIN1)</code> +are <code>0</code> and the length of the string. The default +for <code><i>external-format</i></code> is <code>:LATIN1</code>.
</blockquote>
<p><br>[Function] <br><a class=none name="octets-to-string"><b>octets-to-string</b> <i>sequence <tt>&key</tt> external-format start end</i> => <i>string</i></a>
-<blockquote><br> Converts the Lisp sequence <code><i>sequence</i></code> -of <a href="#octet">octets</a> from <code><i>start</i></code> -to <code><i>end</i></code> to string using -the <a href="#external-formats">external +<blockquote><br> Converts the Lisp +sequence <code><i>sequence</i></code> of <a href="#octet">octets</a> +from <code><i>start</i></code> to <code><i>end</i></code> to string +using the external format designated +by <a href="#external-formats">external format</a> <code><i>external-format</i></code>. The defaults for <code><i>start</i></code> and <code><i>end</i></code> are <code>0</code> and the length of the sequence. The default -for <code><i>external-format</i></code> is the value of -evaluating <code>(<a -href="#make-external-format">MAKE-EXTERNAL-FORMAT</a> :LATIN1)</code> +for <code><i>external-format</i></code> is <code>:LATIN1</code>. </blockquote>
<br> <br><h3><a class=none name="position">File positions</a></h3> @@ -1032,7 +1030,7 @@ numerous patches and additions.
<p> -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.102 2008/05/19 07:57:10 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.104 2008/05/20 06:55:21 edi Exp $ <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.10 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.11 2008/05/20 08:02:49 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -101,7 +101,7 @@ (flet ((write-word (word) (funcall writer (ldb (byte 8 0) word)) (funcall writer (ldb (byte 8 8) word)))) - (declare (inline read-next-word)) + (declare (inline write-word)) (let ((char-code (char-code char))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) @@ -116,7 +116,7 @@ (flet ((write-word (word) (funcall writer (ldb (byte 8 8) word)) (funcall writer (ldb (byte 8 0) word)))) - (declare (inline read-next-word)) + (declare (inline write-word)) (let ((char-code (char-code char))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000)
Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.19 2008/05/19 11:20:11 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.20 2008/05/20 08:02:50 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -219,7 +219,7 @@ (declare #.*standard-optimize-settings*) (declare (ignore initargs)) (with-accessors ((encoding-hash external-format-encoding-hash) - (decoding-table flexi-stream-decoding-table) + (decoding-table external-format-decoding-table) (name external-format-name) (id external-format-id)) external-format
Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.65 2008/05/19 22:54:10 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.66 2008/05/20 00:37:27 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -196,6 +196,10 @@ (stream-read-char stream)))
(defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key) + "An optimized version which uses a buffer underneath. The function +can deliver characters as well as octets and it decides what to do +based on the element type of the sequence (which takes precedence) +and the element type of the stream." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (with-accessors ((position flexi-stream-position) @@ -207,21 +211,31 @@ (element-type flexi-stream-element-type) (stream flexi-stream-stream)) flexi-input-stream - (let ((buffer (make-octet-buffer)) - (buffer-pos 0) - (buffer-end 0) - (index start)) - (declare (fixnum buffer-pos buffer-end index) + (let* ((buffer (make-octet-buffer)) + (buffer-pos 0) + (buffer-end 0) + (index start) + (want-chars-p (or (stringp sequence) + (and (vectorp sequence) + (not (subtypep (array-element-type sequence) 'integer))) + (type-equal element-type 'octet))) + (factor (if want-chars-p (encoding-factor external-format) 1)) + (integer-factor (floor factor)) + ;; it's an interesting question whether it makes sense + ;; performance-wise to make RESERVE significantly bigger + ;; (and thus put potentially a lot more octets into + ;; OCTET-STACK), especially for UTF-8 + (reserve (if (floatp factor) (* 2 integer-factor) 0))) + (declare (fixnum buffer-pos buffer-end index integer-factor reserve) (type (array octet *) buffer)) (flet ((compute-minimum () "Computes the minimum amount of octets we can savely read into the buffer without violating the stream's bound (if there -is one) and without potentially reading more than we need." - ;; this has to be done conservatively, unfortunately - - ;; it is possible that we only fill the buffer in very - ;; small chunks once we're near END (but this is only - ;; relevant for multi-byte encodings, of course) - (let ((minimum (min (the fixnum (- end index)) +buffer-size+))) +is one) and without potentially reading much more than we need." + (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor + (the fixnum (- end index)))) + reserve)) + +buffer-size+))) (cond (bound (min minimum (- bound position))) (t minimum)))) (fill-buffer (end) @@ -286,7 +300,16 @@ ,(sublis '((index . (1- index))) set-place))) (return-from stream-read-sequence index))) (loop - (when (>= index end) (leave)) + (when (>= index end) + ;; check if there are octets in the + ;; buffer we didn't use - see + ;; COMPUTE-MINIMUM above + (loop + (when (>= buffer-pos buffer-end) + (return)) + (decf buffer-end) + (push (aref buffer buffer-end) octet-stack)) + (leave)) (let ((next-thing ,(if octetp '(next-octet) '(octets-to-char-code external-format #'next-octet))))
Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.52 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.54 2008/05/20 06:15:44 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -129,6 +129,19 @@ ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline))
+;; TODO: file-position -> octet-stack (and others?) + +;; other way around: function "resync" trying to use File-position? + +;; "resync" independent function to empty octet-stack? +;; (decrement-file-position) => success +;; (resync ... &optional how-much (length octet-stack)) => success + +;; in stream-read-sequence: if file stream, read more into buffer, +;; then resync with file-position? + +;; TODO: interaction between read and write + (defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) "Writes all elements of the sequence SEQUENCE from START to END to the underlying stream. The elements can be either octets or
Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.19 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.20 2008/05/20 06:15:38 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -30,10 +30,11 @@ (in-package :flexi-streams)
(defun string-to-octets (string &key - (external-format (make-external-format :latin1)) + (external-format :latin1) (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of -octets corresponding to the external format EXTERNAL-FORMAT." +octets corresponding to the external format designated by +EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) @@ -87,10 +88,10 @@ octets))))))
(defun octets-to-string (sequence &key - (external-format (make-external-format :latin1)) + (external-format :latin1) (start 0) (end (length sequence))) "Converts the Lisp sequence SEQUENCE of octets from START to END to -string using the external format EXTERNAL-FORMAT." +string using the external format designated by EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format))
Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.28 2008/05/19 23:54:55 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.29 2008/05/20 00:37:30 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -367,19 +367,25 @@ (with-open-file (in full-path :element-type 'octet) (let* ((in (make-flexi-stream in :external-format external-format)) (list (make-list (- string-length 100)))) - (check (sequence-equal (loop repeat 100 + (check (sequence-equal (loop repeat 50 collect (read-char in)) - (subseq file-string 0 100))) + (subseq file-string 0 50))) (read-sequence list in) - (check (sequence-equal list (subseq file-string 100))))) + (check (sequence-equal list (subseq file-string 50 (- string-length 50)))) + (check (sequence-equal (loop repeat 50 + collect (read-char in)) + (subseq file-string (- string-length 50)))))) (with-open-file (in full-path :element-type 'octet) (let* ((in (make-flexi-stream in :external-format external-format)) (array (make-array (- string-length 50)))) - (check (sequence-equal (loop repeat 50 + (check (sequence-equal (loop repeat 25 collect (read-char in)) - (subseq file-string 0 50))) + (subseq file-string 0 25))) (read-sequence array in) - (check (sequence-equal array (subseq file-string 50)))))))) + (check (sequence-equal array (subseq file-string 25 (- string-length 25)))) + (check (sequence-equal (loop repeat 25 + collect (read-char in)) + (subseq file-string (- string-length 25)))))))))
(defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE @@ -514,11 +520,9 @@ (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - #+(or) (dolist (*copy-function* '(copy-stream copy-stream*)) (dolist (args compare-files-args-list) (apply 'compare-files args))) - #+(or) (let ((string-test-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length string-test-args-list)) @@ -530,10 +534,8 @@ (dolist (args read-sequence-test-args-list) (apply 'read-sequence-test args))) (incf no-tests) - #+(or) (error-handling-test) (incf no-tests) - #+(or) (unread-char-test) (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" (= no-tests *test-success-counter*) (- no-tests *test-success-counter*) no-tests)))