Author: eweitz Date: Sun May 25 16:28:25 2008 New Revision: 58
Modified: branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/encode.lisp branches/edi/input.lisp branches/edi/length.lisp branches/edi/mapping.lisp branches/edi/strings.lisp Log: Optimized the other direction as well
Passes tests on LispWorks
Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -60,26 +60,217 @@ The special variable *CURRENT-UNREADER* must be bound correctly whenever this function is called."))
-(defmethod octets-to-char-code ((format flexi-latin-1-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) - (funcall reader)) +(defgeneric octets-to-string* (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "A generic function which dispatches on the external +format and does the real work for OCTETS-TO-STRING."))
-(defmethod octets-to-char-code ((format flexi-ascii-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) - (when-let (octet (funcall reader)) +(defmethod octets-to-string* :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'vector) start end)) + +(defmacro define-sequence-readers ((format-class) &body body) + "Non-hygienic utility macro which defines methods for READ-SEQUENCE* +and OCTETS-TO-STRING* for the class FORMAT-CLASS. BODY is described +in the docstring of DEFINE-CHAR-ENCODERS but can additionally contain +a form (UNGET <form>) which has to be replaced by the correct code to +`unread' the octets for the character designated by <form>." + (let* ((body `((block char-decoder + (locally + (declare #.*fixnum-optimize-settings*) + ,@body))))) + `(progn + (defmethod read-sequence* ((format ,format-class) flexi-input-stream sequence start end) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code) + (stream flexi-stream-stream)) + flexi-input-stream + (let* (buffer + (buffer-pos 0) + (buffer-end 0) + (index start) + ;; whether we will later be able to rewind the stream if + ;; needed (to get rid of unused octets in the buffer) + (can-rewind-p (maybe-rewind stream 0)) + (factor (encoding-factor format)) + (integer-factor (floor factor)) + ;; it's an interesting question whether it makes sense + ;; performance-wise to make RESERVE significantly bigger + ;; (and thus put potentially a lot more octets into + ;; OCTET-STACK), especially for UTF-8 + (reserve (cond ((not (floatp factor)) 0) + ((not can-rewind-p) (* 2 integer-factor)) + (t (ceiling (* (- factor integer-factor) (- end start))))))) + (declare (fixnum buffer-pos buffer-end index integer-factor reserve) + (boolean can-rewind-p)) + (flet ((compute-fill-amount () + "Computes the amount of octets we can savely read into +the buffer without violating the stream's bound (if there is one) and +without potentially reading much more than we need (unless we can +rewind afterwards)." + (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor + (the fixnum (- end index)))) + reserve)) + +buffer-size+))) + (cond (bound (min minimum (- bound position))) + (t minimum)))) + (fill-buffer (end) + "Tries to fill the buffer from BUFFER-POS to END and +returns NIL if the buffer doesn't contain any new data." + ;; put data from octet stack into buffer if there is any + (loop + (when (>= buffer-pos end) + (return)) + (let ((next-octet (pop octet-stack))) + (cond (next-octet + (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet)) + (incf buffer-pos)) + (t (return))))) + (setq buffer-end (read-sequence buffer stream + :start buffer-pos + :end end)) + ;; BUFFER-POS is only greater than zero if the buffer + ;; already contains unread data from the octet stack + ;; (see below), so we test for ZEROP here and do /not/ + ;; compare with BUFFER-POS + (unless (zerop buffer-end) + (incf position buffer-end)))) + (let ((minimum (compute-fill-amount))) + (declare (fixnum minimum)) + (setq buffer (make-octet-buffer minimum)) + ;; fill buffer for the first time or return immediately if + ;; we don't succeed + (unless (fill-buffer minimum) + (return-from read-sequence* start))) + (setq buffer-pos 0) + (macrolet ((iterate (set-place) + "A very unhygienic macro to implement the +actual iteration through the sequence including housekeeping for the +flexi stream. SET-PLACE is the place (using the index INDEX) used to +access the sequence." + `(flet ((leave () + "This is the function used to +abort the LOOP iteration below." + (when (> index start) + (setq last-octet nil + last-char-code ,(sublis '((index . (1- index))) set-place))) + (return-from read-sequence* index))) + (loop + (when (>= index end) + ;; check if there are octets in the + ;; buffer we didn't use - see + ;; COMPUTE-FILL-AMOUNT above + (let ((rest (- buffer-end buffer-pos))) + (when (plusp rest) + (or (and can-rewind-p + (maybe-rewind stream rest)) + (loop + (when (>= buffer-pos buffer-end) + (return)) + (decf buffer-end) + (push (aref (the (array octet *) buffer) buffer-end) + octet-stack))))) + (leave)) + (let ((next-char-code + (progn (symbol-macrolet + ((octet-getter + ;; this is the code to retrieve the next octet (or + ;; NIL) and to fill the buffer if needed + (block next-octet + (when (>= buffer-pos buffer-end) + (setq buffer-pos 0) + (unless (fill-buffer (compute-fill-amount)) + (return-from next-octet))) + (prog1 + (aref (the (array octet *) buffer) buffer-pos) + (incf buffer-pos))))) + (macrolet ((unget (form) + `(unread-char% ,form flexi-input-stream))) + ,',@body))))) + (unless next-char-code + (leave)) + (setf ,set-place (code-char next-char-code)) + (incf index)))))) + (etypecase sequence + (string (iterate (char sequence index))) + (array (iterate (aref sequence index))) + (list (iterate (nth index sequence))))))))) + (defmethod octets-to-string* ((format ,format-class) sequence start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (let* ((i start) + (string-length (compute-number-of-chars format sequence start end nil)) + (string (make-array string-length :element-type 'char*))) + (declare (fixnum i string-length)) + (loop for j of-type fixnum from 0 below string-length + do (setf (schar string j) + (code-char (macrolet ((unget (form) + `(decf i (character-length format ,form)))) + (symbol-macrolet ((octet-getter (and (< i end) + (prog1 + (aref sequence i) + (incf i))))) + ,@body)))) + finally (return string))))))) + +(defmacro define-char-decoders ((lf-format-class cr-format-class crlf-format-class) &body body) + "Non-hygienic utility macro which defines several decoding-related +methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and +CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same +encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and +similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. +BODY is a code template for the code to read octets and return one +character. BODY must contain a symbol OCTET-GETTER representing the +form which is used to obtain the next octet." + `(progn + (defmethod octets-to-char-code ((format ,lf-format-class) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (symbol-macrolet ((octet-getter (funcall reader))) + ,@(sublis '((char-decoder . octets-to-char-code)) + body))) + (define-sequence-readers (,lf-format-class) ,@body) + (define-sequence-readers (,cr-format-class) + ,(with-unique-names (char-code) + `(let ((,char-code (progn ,@body))) + (case ,char-code + (#.+cr+ #.(char-code #\Newline)) + (otherwise ,char-code))))) + (define-sequence-readers (,crlf-format-class) + ,(with-unique-names (char-code next-char-code get-char-code) + `(flet ((,get-char-code () ,@body)) + (let ((,char-code (,get-char-code))) + (case ,char-code + (#.+cr+ + (let ((,next-char-code (,get-char-code))) + (case ,next-char-code + (#.+lf+ #.(char-code #\Newline)) + ;; we saw a CR but no LF afterwards, but then the data + ;; ended, so we just return #\Return + ((nil) +cr+) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise (unget (code-char ,next-char-code)) + ,char-code)))) + (otherwise ,char-code)))))))) + +(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) + octet-getter) + +(define-char-decoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format) + (when-let (octet octet-getter) (if (> (the octet octet) 127) (recover-from-encoding-error format "No character which corresponds to octet #x~X." octet) octet)))
-(defmethod octets-to-char-code ((format flexi-8-bit-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format) (with-accessors ((decoding-table external-format-decoding-table)) format - (when-let (octet (funcall reader)) + (when-let (octet octet-getter) (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) (the octet octet)))) (if (or (null char-code) @@ -88,19 +279,17 @@ "No character which corresponds to octet #x~X." octet) char-code)))))
-(defmethod octets-to-char-code ((format flexi-utf-8-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-8 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (let ((octet (read-next-byte))) (declare (type octet octet)) @@ -113,11 +302,7 @@ (values (logand octet #b00001111) 2)) ((= #b11110000 (logand octet #b11111000)) (values (logand octet #b00000111) 3)) - ((= #b11111000 (logand octet #b11111100)) - (values (logand octet #b00000011) 4)) - ((= #b11111100 (logand octet #b11111110)) - (values (logand octet #b00000001) 5)) - (t (return-from octets-to-char-code + (t (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X at start of UTF-8 sequence." octet)))) @@ -130,24 +315,22 @@ repeat count for octet of-type octet = (read-next-byte) unless (= #b10000000 (logand octet #b11000000)) - do (return-from octets-to-char-code + do (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X in UTF-8 sequence." octet)) finally (return result)))))))
-(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-16 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () (+ (the octet (read-next-byte)) @@ -159,7 +342,7 @@ (let ((next-word (read-next-word))) (declare (type (unsigned-byte 16) next-word)) (unless (<= #xdc00 next-word #xdfff) - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) @@ -168,19 +351,17 @@ #x10000))) (t word)))))))
-(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-16 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () (+ (ash (the octet (read-next-byte)) 8) @@ -192,7 +373,7 @@ (let ((next-word (read-next-word))) (declare (type (unsigned-byte 16) next-word)) (unless (<= #xdc00 next-word #xdfff) - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) @@ -201,37 +382,33 @@ #x10000))) (t word)))))))
-(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-32 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 0 to 24 by 8 for octet of-type octet = (read-next-byte) sum (ash octet count)))))
-(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-32 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 24 downto 0 by 8 for octet of-type octet = (read-next-byte)
Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sun May 25 16:28:25 2008 @@ -996,7 +996,7 @@
<h4><a name="strings" class=none>Strings</a></h4>
-This section collects a few convenience functions for strings conversions: +This section collects a few convenience functions for strings conversions.
<p><br>[Function] <br><a class=none name="string-to-octets"><b>string-to-octets</b> <i>string <tt>&key</tt> external-format start end</i> => <i>vector</i></a> @@ -1009,7 +1009,9 @@ <code><i>start</i></code> and <code><i>end</i></code> are <code>0</code> and the length of the string. The default for <code><i>external-format</i></code> is <code>:LATIN1</code>. - +<p> +In spite of the name, <code><i>string</i></code> can be any sequence of characters, but +the function is optimized for strings. </blockquote>
<p><br>[Function] @@ -1023,6 +1025,11 @@ <code><i>start</i></code> and <code><i>end</i></code> are <code>0</code> and the length of the sequence. The default for <code><i>external-format</i></code> is <code>:LATIN1</code>. +<p> +This function is optimized for the case +of <code><i>sequence</i></code> being +a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>. +Don't use lists if you are in hurry. </blockquote>
<p><br>[Function] @@ -1030,14 +1037,17 @@
<blockquote><br>
-Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in +Returns the length of the subsequence of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in <a href="#octet">octets</a> if encoded using the <a href="#external-formats">external format</a> designated by <code><i>external-format</i></code>. The defaults for <code><i>start</i></code> and <code><i>end</i></code> -are <code>0</code> and the length of the string. The default +are <code>0</code> and the length of <code><i>string</i></code>. The default for <code><i>external-format</i></code> is <code>:LATIN1</code>. +<p> +In spite of the name, <code><i>string</i></code> can be any sequence of characters, but +the function is optimized for strings. </blockquote>
<p><br>[Function] @@ -1054,6 +1064,11 @@ <code><i>start</i></code> and <code><i>end</i></code> are <code>0</code> and the length of the sequence. The default for <code><i>external-format</i></code> is <code>:LATIN1</code>. +<p> +This function is optimized for the case +of <code><i>sequence</i></code> being +a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>. +Don't use lists if you are in hurry. </blockquote>
<br> <br><h3><a class=none name="position">File positions</a></h3> @@ -1095,7 +1110,7 @@ his work on making FLEXI-STREAMS faster.
<p> -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $ <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -47,130 +47,140 @@ (:documentation "A generic function which dispatches on the external format and does the real work for STRING-TO-OCTETS."))
+(defmethod string-to-octets* :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'string*) start end)) + (defmacro define-sequence-writers ((format-class) &body body) - "Utility macro which defines methods for WRITE-SEQUENCE* and -STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the -docstring of DEFINE-CHAR-ENCODERS." - `(progn - (defmethod write-sequence* ((format ,format-class) stream sequence start end) - (declare #.*standard-optimize-settings*) - (declare (fixnum start end)) - (with-accessors ((column flexi-stream-column)) - stream - (let* ((octet-seen-p nil) - (buffer-pos 0) - ;; estimate should be good enough... - (factor (encoding-factor format)) - ;; we don't want arbitrarily large buffer, do we? - (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) - (buffer (make-octet-buffer buffer-size))) - (declare (fixnum buffer-pos buffer-size) - (boolean octet-seen-p) - (type (array octet *) buffer)) - (macrolet ((octet-writer (form) - `(write-octet ,form))) - (labels ((flush-buffer () - "Sends all octets in BUFFER to the underlying stream." - (write-sequence buffer stream :end buffer-pos) - (setq buffer-pos 0)) - (write-octet (octet) - "Adds one octet to the buffer and flushes it if necessary." - (declare (type octet octet)) - (when (>= buffer-pos buffer-size) - (flush-buffer)) - (setf (aref buffer buffer-pos) octet) - (incf buffer-pos)) - (write-object (object) - "Dispatches to WRITE-OCTET or WRITE-CHARACTER -depending on the type of OBJECT." - (etypecase object - (octet (setq octet-seen-p t) - (write-octet object)) - (character (symbol-macrolet ((char-getter object)) - ,@body))))) - (macrolet ((iterate (&body output-forms) - "An unhygienic macro to implement the actual -iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one -sequence element and put its octet representation into the buffer." - `(loop for index of-type fixnum from start below end - do (progn ,@output-forms) - finally (when (plusp buffer-pos) - (flush-buffer))))) - (etypecase sequence - (string (iterate - (symbol-macrolet ((char-getter (char sequence index))) - ,@body))) - (array (iterate - (symbol-macrolet ((char-getter (aref sequence index))) - ,@body))) - (list (iterate (write-object (nth index sequence)))))) - ;; update the column slot, setting it to NIL if we sent - ;; octets - (setq column - (cond (octet-seen-p nil) - (t (let ((last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t))) - (cond (last-newline-pos (- end last-newline-pos 1)) - (column (+ column (- end start))))))))))))) - (defmethod string-to-octets* ((format ,format-class) string start end) - (declare #.*standard-optimize-settings*) - (declare (fixnum start end) (string string)) - (let ((octets (make-array (compute-number-of-octets format string start end) - :element-type 'octet)) - (j 0)) - (declare (fixnum j)) - (loop for i of-type fixnum from start below end do - (macrolet ((octet-writer (form) - `(progn - (setf (aref (the (array octet *) octets) j) ,form) - (incf j)))) - (symbol-macrolet ((char-getter (char string i))) - (progn ,@body)))) - octets)))) - -;; char-getter can be called more than once - no side effects -(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body) - "Utility macro which defines several encoding-related methods for -the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where -it is assumed that CR-FORMAT-CLASS is the same encoding as -FORMAT-CLASS but with CR line endings and similar for -CRLF-FORMAT-CLASS. BODY is a code template for the code to convert -one character to octets. BODY must contain a symbol CHAR-GETTER -representing the form which is used to obtain the character and a -forms like (OCTET-WRITE <thing>) to write the octet <thing>. The -CHAR-GETTER form might be called more than once." + "Non-hygienic utility macro which defines methods for +WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For +BODY see the docstring of DEFINE-CHAR-ENCODERS." (let ((body `((locally (declare #.*fixnum-optimize-settings*) ,@body)))) `(progn - (defmethod char-to-octets ((format ,format-class) char writer) - (declare (character char) (function writer)) - (symbol-macrolet ((char-getter char)) - (macrolet ((octet-writer (form) - `(funcall writer ,form))) - ,@body))) - (define-sequence-writers (,format-class) ,@body) - (define-sequence-writers (,cr-format-class) - ,@(sublis `((char-getter . ,(with-unique-names (char) - `(let ((,char char-getter)) - (declare (character ,char)) - (if (char= ,char #\Newline) - #\Return - ,char))))) - body)) - (define-sequence-writers (,crlf-format-class) - ,(with-unique-names (char write-char) - `(flet ((,write-char (,char) - ,@(sublis `((char-getter . ,char)) body))) - (let ((,char char-getter)) - (declare (character ,char)) - (cond ((char= ,char #\Newline) - (,write-char #\Return) - (,write-char #\Newline)) - (t (,write-char ,char)))))))))) + (defmethod string-to-octets* ((format ,format-class) string start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (let ((octets (make-array (compute-number-of-octets format string start end) + :element-type 'octet)) + (j 0)) + (declare (fixnum j)) + (loop for i of-type fixnum from start below end do + (macrolet ((octet-writer (form) + `(progn + (setf (aref (the (array octet *) octets) j) ,form) + (incf j)))) + (symbol-macrolet ((char-getter (char string i))) + (progn ,@body)))) + octets)) + (defmethod write-sequence* ((format ,format-class) stream sequence start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((column flexi-stream-column)) + stream + (let* ((octet-seen-p nil) + (buffer-pos 0) + ;; estimate should be good enough... + (factor (encoding-factor format)) + ;; we don't want arbitrarily large buffer, do we? + (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) + (buffer (make-octet-buffer buffer-size))) + (declare (fixnum buffer-pos buffer-size) + (boolean octet-seen-p) + (type (array octet *) buffer)) + (macrolet ((octet-writer (form) + `(write-octet ,form))) + (labels ((flush-buffer () + "Sends all octets in BUFFER to the underlying stream." + (write-sequence buffer stream :end buffer-pos) + (setq buffer-pos 0)) + (write-octet (octet) + "Adds one octet to the buffer and flushes it if necessary." + (declare (type octet octet)) + (when (>= buffer-pos buffer-size) + (flush-buffer)) + (setf (aref buffer buffer-pos) octet) + (incf buffer-pos)) + (write-object (object) + "Dispatches to WRITE-OCTET or WRITE-CHARACTER +depending on the type of OBJECT." + (etypecase object + (octet (setq octet-seen-p t) + (write-octet object)) + (character (symbol-macrolet ((char-getter object)) + ,@body))))) + (macrolet ((iterate (&body output-forms) + "An unhygienic macro to implement the actual +iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one +sequence element and put its octet representation into the buffer." + `(loop for index of-type fixnum from start below end + do (progn ,@output-forms) + finally (when (plusp buffer-pos) + (flush-buffer))))) + (etypecase sequence + (string (iterate + (symbol-macrolet ((char-getter (char sequence index))) + ,@body))) + (array (iterate + (symbol-macrolet ((char-getter (aref sequence index))) + ,@body))) + (list (iterate (write-object (nth index sequence)))))) + ;; update the column slot, setting it to NIL if we sent + ;; octets + (setq column + (cond (octet-seen-p nil) + (t (let ((last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (cond (last-newline-pos (- end last-newline-pos 1)) + (column (+ column (- end start)))))))))))))))) + +(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body) + "Non-hygienic utility macro which defines several encoding-related +methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and +CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same +encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and +similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. +BODY is a code template for the code to convert one character to +octets. BODY must contain a symbol CHAR-GETTER representing the form +which is used to obtain the character and a forms like (OCTET-WRITE +<thing>) to write the octet <thing>. The CHAR-GETTER form might be +called more than once." + `(progn + (defmethod char-to-octets ((format ,lf-format-class) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (symbol-macrolet ((char-getter char)) + (macrolet ((octet-writer (form) + `(funcall writer ,form))) + ,@body))) + (define-sequence-writers (,lf-format-class) ,@body) + (define-sequence-writers (,cr-format-class) + ;; modify the body so that the getter replaces a #\Newline + ;; with a #\Return + ,@(sublis `((char-getter . ,(with-unique-names (char) + `(let ((,char char-getter)) + (declare (character ,char)) + (if (char= ,char #\Newline) + #\Return + ,char))))) + body)) + (define-sequence-writers (,crlf-format-class) + ;; modify the body so that we potentially write octets for + ;; two characters (#\Return and #\Linefeed) - the original + ;; body is wrapped with the WRITE-CHAR local function + ,(with-unique-names (char write-char) + `(flet ((,write-char (,char) + ,@(sublis `((char-getter . ,char)) body))) + (let ((,char char-getter)) + (declare (character ,char)) + (cond ((char= ,char #\Newline) + (,write-char #\Return) + (,write-char #\Linefeed)) + (t (,write-char ,char)))))))))
(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) (let ((octet (char-code char-getter)))
Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.78 2008/05/25 19:25:44 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -201,9 +201,7 @@ others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) - (with-accessors ((position flexi-stream-position) - (bound flexi-stream-bound) - (octet-stack flexi-stream-octet-stack) + (with-accessors ((octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format) (last-octet flexi-stream-last-octet) (last-char-code flexi-stream-last-char-code) @@ -233,116 +231,8 @@ (setq last-char-code nil last-octet (elt sequence (1- index)))) (return-from stream-read-sequence index))) - (let* (buffer - (buffer-pos 0) - (buffer-end 0) - (index start) - ;; whether we will later be able to rewind the stream if - ;; needed (to get rid of unused octets in the buffer) - (can-rewind-p (maybe-rewind stream 0)) - (factor (encoding-factor external-format)) - (integer-factor (floor factor)) - ;; it's an interesting question whether it makes sense - ;; performance-wise to make RESERVE significantly bigger - ;; (and thus put potentially a lot more octets into - ;; OCTET-STACK), especially for UTF-8 - (reserve (cond ((not (floatp factor)) 0) - ((not can-rewind-p) (* 2 integer-factor)) - (t (ceiling (* (- factor integer-factor) (- end start))))))) - (declare (fixnum buffer-pos buffer-end index integer-factor reserve) - (boolean can-rewind-p)) - (flet ((compute-fill-amount () - "Computes the amount of octets we can savely read into -the buffer without violating the stream's bound (if there is one) and -without potentially reading much more than we need (unless we can -rewind afterwards)." - (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor - (the fixnum (- end index)))) - reserve)) - +buffer-size+))) - (cond (bound (min minimum (- bound position))) - (t minimum)))) - (fill-buffer (end) - "Tries to fill the buffer from BUFFER-POS to END and -returns NIL if the buffer doesn't contain any new data." - ;; put data from octet stack into buffer if there is any - (loop - (when (>= buffer-pos end) - (return)) - (let ((next-octet (pop octet-stack))) - (cond (next-octet - (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet)) - (incf buffer-pos)) - (t (return))))) - (setq buffer-end (read-sequence buffer stream - :start buffer-pos - :end end)) - ;; BUFFER-POS is only greater than zero if the buffer - ;; already contains unread data from the octet stack - ;; (see below), so we test for ZEROP here and do /not/ - ;; compare with BUFFER-POS - (unless (zerop buffer-end) - (incf position buffer-end)))) - (let ((minimum (compute-fill-amount))) - (declare (fixnum minimum)) - (setq buffer (make-octet-buffer minimum)) - ;; fill buffer for the first time or return immediately if - ;; we don't succeed - (unless (fill-buffer minimum) - (return-from stream-read-sequence start))) - (setq buffer-pos 0) - (flet ((next-octet () - "Returns the next octet from the buffer and fills it -if it is exhausted. Returns NIL if there's no more data on the -stream." - (when (>= buffer-pos buffer-end) - (setq buffer-pos 0) - (unless (fill-buffer (compute-fill-amount)) - (return-from next-octet))) - (prog1 - (aref (the (array octet *) buffer) buffer-pos) - (incf buffer-pos))) - (unreader (char) - (unread-char% char flexi-input-stream))) - (declare (dynamic-extent (function next-octet) (function unreader))) - (let ((*current-unreader* #'unreader)) - (macrolet ((iterate (set-place) - "A very unhygienic macro to implement the -actual iteration through the sequence including housekeeping for the -flexi stream. SET-PLACE is the place (using the index INDEX) used to -access the sequence." - `(flet ((leave () - "This is the function used to abort -the LOOP iteration below." - (when (> index start) - (setq last-octet nil - last-char-code ,(sublis '((index . (1- index))) set-place))) - (return-from stream-read-sequence index))) - (loop - (when (>= index end) - ;; check if there are octets in the - ;; buffer we didn't use - see - ;; COMPUTE-FILL-AMOUNT above - (let ((rest (- buffer-end buffer-pos))) - (when (plusp rest) - (or (and can-rewind-p - (maybe-rewind stream rest)) - (loop - (when (>= buffer-pos buffer-end) - (return)) - (decf buffer-end) - (push (aref (the (array octet *) buffer) buffer-end) - octet-stack))))) - (leave)) - (let ((next-char-code (octets-to-char-code external-format #'next-octet))) - (unless next-char-code - (leave)) - (setf ,set-place (code-char next-char-code)) - (incf index)))))) - (etypecase sequence - (string (iterate (char sequence index))) - (array (iterate (aref sequence index))) - (list (iterate (nth index sequence))))))))))) + ;; otherwise hand over to the external format to do the work + (read-sequence* external-format flexi-input-stream sequence start end)))
(defmethod stream-unread-char ((stream flexi-input-stream) char) "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM.
Modified: branches/edi/length.lisp ============================================================================== --- branches/edi/length.lisp (original) +++ branches/edi/length.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -115,7 +115,7 @@ ;; formats with CRLF line endings have their own specialized methods ;; below (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore warnp)) (let ((i start) (length (- end start))) @@ -132,7 +132,7 @@
(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start)) (declare (fixnum i sum)) @@ -152,7 +152,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start) (last-octet 0)) @@ -175,7 +175,7 @@
(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore sequence)) (when (and warnp (oddp (- end start))) (signal-encoding-warning format "~A octet~:P cannot be decoded ~ @@ -203,7 +203,7 @@
(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start)) (declare (fixnum i sum)) @@ -222,7 +222,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start) (last-octet 0)) @@ -248,7 +248,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start) (last-octet 0)) @@ -290,7 +290,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) @@ -308,7 +308,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) @@ -330,22 +330,26 @@ encode the sequence of characters in SEQUENCE from START to END using the external format FORMAT."))
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) +(defmethod compute-number-of-octets :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'string*) start end)) + +(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence)) + (declare (ignore string)) (- end start))
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((< char-code #x80) 1) ((< char-code #x800) 2) ((< char-code #x10000) 3) @@ -355,16 +359,16 @@ (incf i))) sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((= char-code #.(char-code #\Newline)) 2) ((< char-code #x80) 1) ((< char-code #x800) 2) @@ -375,16 +379,16 @@ (incf i))) sum))
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((< char-code #x10000) 2) (t 4)))) (declare (fixnum char-length) (type char-code-integer char-code)) @@ -392,16 +396,16 @@ (incf i))) sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((= char-code #.(char-code #\Newline)) 4) ((< char-code #x10000) 2) (t 4)))) @@ -410,16 +414,16 @@ (incf i))) sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((= char-code #.(char-code #\Newline)) 4) ((< char-code #x10000) 2) (t 4)))) @@ -428,17 +432,39 @@ (incf i))) sum))
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence)) + (declare (ignore string)) (* 4 (- end start)))
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (+ (call-next-method) (* (case (external-format-name format) (:utf-32 4) (otherwise 1)) - (count #\Newline sequence :start start :end end :test #'char=)))) \ No newline at end of file + (count #\Newline string :start start :end end :test #'char=)))) + +(defgeneric character-length (format char) + (declare #.*fixnum-optimize-settings*) + (:documentation "Returns the number of octets needed to encode the +single character CHAR.") + (:method (format char) + (compute-number-of-octets format (string char) 0 1))) + +(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline))) + (declare #.*fixnum-optimize-settings*) + (+ (call-next-method format +cr+) + (call-next-method format +lf+))) + +(defmethod character-length ((format flexi-8-bit-format) char) + (declare #.*fixnum-optimize-settings*) + (declare (ignore char)) + 1) + +(defmethod character-length ((format flexi-utf-32-format) char) + (declare #.*fixnum-optimize-settings*) + (declare (ignore char)) + 4) \ No newline at end of file
Modified: branches/edi/mapping.lisp ============================================================================== --- branches/edi/mapping.lisp (original) +++ branches/edi/mapping.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.3 2008/05/25 19:07:53 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -39,6 +39,12 @@ #+:lispworks 'lw:simple-char #-:lispworks 'character)
+(deftype string* () + "Convenience shortcut to paper over the difference between LispWorks +and the other Lisps." + #+:lispworks 'lw:text-string + #-:lispworks 'string) + (deftype char-code-integer () "The subtype of integers which can be returned by the function CHAR-CODE." '(integer 0 #.(1- char-code-limit)))
Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -34,7 +34,10 @@ (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of octets corresponding to the external format designated by -EXTERNAL-FORMAT." +EXTERNAL-FORMAT. + +In spite of the name, STRING can be any sequence of characters, but +the function is optimized for strings." (declare #.*standard-optimize-settings*) (declare (string string)) (setq external-format (maybe-convert-external-format external-format)) @@ -45,51 +48,22 @@ (external-format :latin1) (start 0) (end (length sequence))) "Converts the Lisp sequence SEQUENCE of octets from START to END to -a string using the external format designated by EXTERNAL-FORMAT." +a string using the external format designated by EXTERNAL-FORMAT. + +This function is optimized for the case of SEQUENCE being a vector. +Don't use lists if you're in a hurry." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) - (let* ((i start) - (reader (etypecase sequence - ((array octet *) - (lambda () - (and (< i end) - (prog1 - (aref (the (array octet *) sequence) i) - (incf i))))) - ((array * *) - (lambda () - (and (< i end) - (prog1 - (aref sequence i) - (incf i))))) - (list - (lambda () - (and (< i end) - (prog1 - (nth i sequence) - (incf i)))))))) - (declare (fixnum i) (dynamic-extent reader)) - (labels ((pseudo-writer (octet) - (declare (ignore octet)) - (decf i)) - (unreader (char) - (char-to-octets external-format char #'pseudo-writer))) - (declare (dynamic-extent (function pseudo-writer) (function unreader))) - (let ((*current-unreader* #'unreader)) - (flet ((next-char () - (code-char (octets-to-char-code external-format reader)))) - (declare (inline next-char)) - (let* ((string-length (compute-number-of-chars external-format sequence start end nil)) - (string (make-array string-length :element-type 'char*))) - (declare (fixnum string-length)) - (loop for j of-type fixnum from 0 below string-length - do (setf (schar string j) (next-char)) - finally (return string)))))))) + ;; the external format knows how to do it... + (octets-to-string* external-format sequence start end))
(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string))) "Returns the length of the substring of STRING from START to END in -octets if encoded using the external format EXTERNAL-FORMAT." +octets if encoded using the external format EXTERNAL-FORMAT. + +In spite of the name, STRING can be any sequence of characters, but +the function is optimized for strings." (declare #.*standard-optimize-settings*) (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) @@ -98,7 +72,10 @@ (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence))) "Kind of the inverse of OCTET-LENGTH. Returns the length of the subsequence (of octets) of SEQUENCE from START to END in characters -if decoded using the external format EXTERNAL-FORMAT." +if decoded using the external format EXTERNAL-FORMAT. + +This function is optimized for the case of SEQUENCE being a vector. +Don't use lists if you're in a hurry." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format))