Author: eweitz Date: Sun May 25 08:26:47 2008 New Revision: 57
Added: branches/edi/length.lisp (contents, props changed) Modified: branches/edi/decode.lisp branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd Log: Re-org
Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,256 +29,6 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-chars (format sequence start end warnp) - (declare #.*standard-optimize-settings*) - (:documentation "Computes the exact number of characters required to -decode the sequence of octets in SEQUENCE from START to END using the -external format FORMAT. If WARNP is NIL, warnings will be muffled.")) - -(defmethod compute-number-of-chars :around (format (list list) start end warnp) - (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'vector) start end warnp)) - -(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence warnp)) - (- end start)) - -(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) - ;; this method only applies to the 8-bit formats as all other - ;; formats with CRLF line endings have their own specialized methods - ;; below - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore warnp)) - (let ((i start) - (length (- end start))) - (declare (fixnum i length)) - (loop - (when (>= i end) - (return)) - (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=))) - (unless position - (return)) - (setq i (1+ position)) - (decf length))) - length)) - -(defgeneric check-end (format start end i warnp) - (declare #.*fixnum-optimize-settings*) - (:method (format start end i warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end i)) - (when (and warnp (> i end)) - (signal-encoding-warning format "These ~A octet~:P can't be ~ -decoded using ~A as the sequence is too short. ~A octet~:P missing ~ -at then end." - (- end start) - (external-format-name format) - (- i end)))) - (:method ((format flexi-utf-16-format) start end i warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end i)) - (declare (ignore i warnp)) - ;; don't warn twice - (when (evenp (- end start)) - (call-next-method)))) - -(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((octet (aref sequence i)) - (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) - (t 4)))) - (declare (fixnum length) (type octet octet)) - (incf sum) - (incf i length))) - (check-end format start end i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start) - (last-octet 0)) - (declare (fixnum i sum) (type octet last-octet)) - (loop - (when (>= i end) - (return)) - (let* ((octet (aref sequence i)) - (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) - (t 4)))) - (declare (fixnum length) (type octet octet)) - (unless (and (= octet +lf+) (= last-octet +cr+)) - (incf sum)) - (incf i length) - (setq last-octet octet))) - (check-end format start end i warnp) - sum)) - -(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (when (and warnp (oddp (- end start))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ -using UTF-16 as ~:*~A is not even." - (- end start)))) - -(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence (1+ i))) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (incf sum) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence i)) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (incf sum) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start) - (last-octet 0)) - (declare (fixnum i sum) (type octet last-octet)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence (1+ i))) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (unless (and (zerop high-octet) - (= (the octet (aref sequence i)) +lf+) - (= last-octet +cr+)) - (incf sum)) - (setq last-octet (if (zerop high-octet) - (aref sequence i) - 0)) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start) - (last-octet 0)) - (declare (fixnum i sum) (type octet last-octet)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence i)) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (unless (and (zerop high-octet) - (= (the octet (aref sequence (1+ i))) +lf+) - (= last-octet +cr+)) - (incf sum)) - (setq last-octet (if (zerop high-octet) - (aref sequence (1+ i)) - 0)) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (let ((length (- end start))) - (when (and warnp (plusp (mod length 4))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ -using UTF-32 as ~:*~A is not a multiple-value of four." - length)))) - -(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence warnp)) - (ceiling (- end start) 4)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore warnp)) - (let ((i start) - (length (ceiling (- end start) 4))) - (decf end 8) - (loop - (when (> i end) - (return)) - (cond ((loop for j of-type fixnum from i - for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) - always (= octet (aref sequence j))) - (decf length) - (incf i 8)) - (t (incf i 4)))) - length)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore warnp)) - (let ((i start) - (length (ceiling (- end start) 4))) - (decf end 8) - (loop - (when (> i end) - (return)) - (cond ((loop for j of-type fixnum from i - for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) - always (= octet (aref sequence j))) - (decf length) - (incf i 8)) - (t (incf i 4)))) - length)) - (defun recover-from-encoding-error (external-format format-control &rest format-args) "Helper function used by OCTETS-TO-CHAR-CODE below to deal with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns
Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,125 +29,6 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-octets (format sequence start end) - (declare #.*standard-optimize-settings*) - (:documentation "Computes the exact number of octets required to -encode the sequence of characters in SEQUENCE from START to END using -the external format FORMAT.")) - -(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (- end start)) - -(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((< char-code #x80) 1) - ((< char-code #x800) 2) - ((< char-code #x10000) 3) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((= char-code #.(char-code #\Newline)) 2) - ((< char-code #x80) 1) - ((< char-code #x800) 2) - ((< char-code #x10000) 3) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((< char-code #x10000) 2) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((= char-code #.(char-code #\Newline)) 4) - ((< char-code #x10000) 2) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((= char-code #.(char-code #\Newline)) 4) - ((< char-code #x10000) 2) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (* 4 (- end start))) - -(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (+ (call-next-method) - (* (case (external-format-name format) - (:utf-32 4) - (otherwise 1)) - (count #\Newline sequence :start start :end end :test #'char=)))) - (defgeneric char-to-octets (format char writer) (declare #.*standard-optimize-settings*) (:documentation "Converts the character CHAR to a sequence of octets
Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.22 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -387,46 +387,3 @@ NORMALIZE-EXTERNAL-FORMAT." (print-unreadable-object (object stream :type t :identity t) (prin1 (normalize-external-format object) stream))) - -(defgeneric encoding-factor (format) - (:documentation "Given an external format FORMAT, returns a factor -which denotes the octets to characters ratio to expect when -encoding/decoding. If the returned value is an integer, the factor is -assumed to be exact. If it is a (double) float, the factor is -supposed to be based on heuristics and usually not exact. - -This factor is used in string.lisp.") - (declare #.*standard-optimize-settings*)) - -(defmethod encoding-factor ((format flexi-8-bit-format)) - (declare #.*standard-optimize-settings*) - ;; 8-bit encodings map octets to characters in an exact one-to-one - ;; fashion - 1) - -(defmethod encoding-factor ((format flexi-utf-8-format)) - (declare #.*standard-optimize-settings*) - ;; UTF-8 characters can be anything from one to six octets, but we - ;; assume that the "overhead" is only about 5 percent - this - ;; estimate is obviously very much dependant on the content - 1.05d0) - -(defmethod encoding-factor ((format flexi-utf-16-format)) - (declare #.*standard-optimize-settings*) - ;; usually one character maps to two octets, but characters with - ;; code points above #x10000 map to four octets - we assume that we - ;; usually don't see these characters but of course have to return a - ;; float - 2.0d0) - -(defmethod encoding-factor ((format flexi-utf-32-format)) - (declare #.*standard-optimize-settings*) - ;; UTF-32 always matches every character to four octets - 4) - -(defmethod encoding-factor ((format flexi-crlf-mixin)) - (declare #.*standard-optimize-settings*) - ;; if the sequence #\Return #\Linefeed is the line-end marker, this - ;; obviously makes encodings potentially longer and definitely makes - ;; the estimate unexact - (* 1.02d0 (call-next-method)))
Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -47,6 +47,7 @@ (:file "util") (:file "conditions") (:file "external-format") + (:file "length") (:file "encode") (:file "decode") (:file "in-memory")
Added: branches/edi/length.lisp ============================================================================== --- (empty file) +++ branches/edi/length.lisp Sun May 25 08:26:47 2008 @@ -0,0 +1,444 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric encoding-factor (format) + (:documentation "Given an external format FORMAT, returns a factor +which denotes the octets to characters ratio to expect when +encoding/decoding. If the returned value is an integer, the factor is +assumed to be exact. If it is a (double) float, the factor is +supposed to be based on heuristics and usually not exact. + +This factor is used in string.lisp.") + (declare #.*standard-optimize-settings*)) + +(defmethod encoding-factor ((format flexi-8-bit-format)) + (declare #.*standard-optimize-settings*) + ;; 8-bit encodings map octets to characters in an exact one-to-one + ;; fashion + 1) + +(defmethod encoding-factor ((format flexi-utf-8-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-8 characters can be anything from one to six octets, but we + ;; assume that the "overhead" is only about 5 percent - this + ;; estimate is obviously very much dependant on the content + 1.05d0) + +(defmethod encoding-factor ((format flexi-utf-16-format)) + (declare #.*standard-optimize-settings*) + ;; usually one character maps to two octets, but characters with + ;; code points above #x10000 map to four octets - we assume that we + ;; usually don't see these characters but of course have to return a + ;; float + 2.0d0) + +(defmethod encoding-factor ((format flexi-utf-32-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-32 always matches every character to four octets + 4) + +(defmethod encoding-factor ((format flexi-crlf-mixin)) + (declare #.*standard-optimize-settings*) + ;; if the sequence #\Return #\Linefeed is the line-end marker, this + ;; obviously makes encodings potentially longer and definitely makes + ;; the estimate unexact + (* 1.02d0 (call-next-method))) + +(defgeneric check-end (format start end i warnp) + (declare #.*fixnum-optimize-settings*) + (:documentation "Helper function used below to determine if we tried +to read past the end of the sequence.") + (:method (format start end i warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) + (when (and warnp (> i end)) + (signal-encoding-warning format "These ~A octet~:P can't be ~ +decoded using ~A as the sequence is too short. ~A octet~:P missing ~ +at then end." + (- end start) + (external-format-name format) + (- i end)))) + (:method ((format flexi-utf-16-format) start end i warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) + (declare (ignore i warnp)) + ;; don't warn twice + (when (evenp (- end start)) + (call-next-method)))) + +(defgeneric compute-number-of-chars (format sequence start end warnp) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of characters required to +decode the sequence of octets in SEQUENCE from START to END using the +external format FORMAT. If WARNP is NIL, warnings will be muffled.")) + +(defmethod compute-number-of-chars :around (format (list list) start end warnp) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'vector) start end warnp)) + +(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence warnp)) + (- end start)) + +(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) + ;; this method only applies to the 8-bit formats as all other + ;; formats with CRLF line endings have their own specialized methods + ;; below + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore warnp)) + (let ((i start) + (length (- end start))) + (declare (fixnum i length)) + (loop + (when (>= i end) + (return)) + (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=))) + (unless position + (return)) + (setq i (1+ position)) + (decf length))) + length)) + +(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((octet (aref sequence i)) + (length (cond ((not (logbitp 7 octet)) 1) + ((= #b11000000 (logand octet #b11100000)) 2) + ((= #b11100000 (logand octet #b11110000)) 3) + (t 4)))) + (declare (fixnum length) (type octet octet)) + (incf sum) + (incf i length))) + (check-end format start end i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (type octet last-octet)) + (loop + (when (>= i end) + (return)) + (let* ((octet (aref sequence i)) + (length (cond ((not (logbitp 7 octet)) 1) + ((= #b11000000 (logand octet #b11100000)) 2) + ((= #b11100000 (logand octet #b11110000)) 3) + (t 4)))) + (declare (fixnum length) (type octet octet)) + (unless (and (= octet +lf+) (= last-octet +cr+)) + (incf sum)) + (incf i length) + (setq last-octet octet))) + (check-end format start end i warnp) + sum)) + +(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (when (and warnp (oddp (- end start))) + (signal-encoding-warning format "~A octet~:P cannot be decoded ~ +using UTF-16 as ~:*~A is not even." + (- end start)))) + +(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence (1+ i))) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (incf sum) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence i)) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (incf sum) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (type octet last-octet)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence (1+ i))) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (unless (and (zerop high-octet) + (= (the octet (aref sequence i)) +lf+) + (= last-octet +cr+)) + (incf sum)) + (setq last-octet (if (zerop high-octet) + (aref sequence i) + 0)) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (type octet last-octet)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence i)) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (unless (and (zerop high-octet) + (= (the octet (aref sequence (1+ i))) +lf+) + (= last-octet +cr+)) + (incf sum)) + (setq last-octet (if (zerop high-octet) + (aref sequence (1+ i)) + 0)) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (let ((length (- end start))) + (when (and warnp (plusp (mod length 4))) + (signal-encoding-warning format "~A octet~:P cannot be decoded ~ +using UTF-32 as ~:*~A is not a multiple-value of four." + length)))) + +(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence warnp)) + (ceiling (- end start) 4)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore warnp)) + (let ((i start) + (length (ceiling (- end start) 4))) + (decf end 8) + (loop + (when (> i end) + (return)) + (cond ((loop for j of-type fixnum from i + for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) + always (= octet (aref sequence j))) + (decf length) + (incf i 8)) + (t (incf i 4)))) + length)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore warnp)) + (let ((i start) + (length (ceiling (- end start) 4))) + (decf end 8) + (loop + (when (> i end) + (return)) + (cond ((loop for j of-type fixnum from i + for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) + always (= octet (aref sequence j))) + (decf length) + (incf i 8)) + (t (incf i 4)))) + length)) + +(defgeneric compute-number-of-octets (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of octets required to +encode the sequence of characters in SEQUENCE from START to END using +the external format FORMAT.")) + +(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (- end start)) + +(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 2) + ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (* 4 (- end start))) + +(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (+ (call-next-method) + (* (case (external-format-name format) + (:utf-32 4) + (otherwise 1)) + (count #\Newline sequence :start start :end end :test #'char=)))) \ No newline at end of file