Author: hhubner Date: Thu May 1 02:31:46 2008 New Revision: 4
Modified: branches/hans/input.lisp branches/hans/stream.lisp branches/hans/strings.lisp Log: commit first set of changes to speed up octets-to-string
Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 02:31:46 2008 @@ -242,34 +242,78 @@ (decf position) (push #.(char-code #\Return) octet-stack)))))
+(defun buffer-code-char (buffer char-code) + "Given a BUFFER, which is assumed to be a + to-string-conversion-buffer (see strings.lisp) and a character + code, convert to a character and perform newline processing for the + stream if the character is a #\Return. This code basically repeats + what the stream-read-char ((stream flexi-cr-mixin)) does, but it + does so in an optimized manner to make octet->string conversion + faster." + (declare (optimize speed (safety 0)) + (type to-string-conversion-buffer buffer) + (type fixnum char-code)) + (let ((char (code-char char-code))) + (if (eql char #\Return) + (case (tscb-eol-style buffer) + (:cr + #\Newline) + (:crlf + (cond + ((= (tscb-position buffer) (tscb-end buffer)) + :eof) + ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer))) + (incf (tscb-position buffer)) + #\Newline) + (t + #\Return))) + (t + #\Return)) + char))) +(declaim (inline buffer-code-char)) + (defmacro define-char-reader ((stream-var stream-class) &body body) - "Helper macro to define methods for STREAM-READ-CHAR. Defines a -method for the class STREAM-CLASS using the variable STREAM-VAR and -the code body BODY wrapped with some standard code common to all -methods defined here. The return value of BODY is a character code. -In case of encoding problems, BODY must return the value returned by -(RECOVER-FROM-ENCODING-ERROR ...)." - (with-unique-names (char-code body-fn) - `(defmethod stream-read-char ((,stream-var ,stream-class)) - "This method was generated with the DEFINE-CHAR-READER macro." - (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library - (with-accessors ((last-octet flexi-stream-last-octet) - (last-char-code flexi-stream-last-char-code)) - ,stream-var - ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after - ;; this operation - (setq last-octet nil) - (let ((,char-code - (flet ((,body-fn () ,@body)) - (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) - (,body-fn)))) - ;; remember this character and the current external format - ;; for UNREAD-CHAR - (setq last-char-code ,char-code) - (or (code-char ,char-code) ,char-code)))))) + "Helper macro to define methods for STREAM-READ-CHAR and +BUFFER-READ-CHAR. Defines a method for the class STREAM-CLASS using +the variable STREAM-VAR and the code body BODY wrapped with some +standard code common to all methods defined here. The return value of +BODY is a character code. In case of encoding problems, BODY must +return the value returned by (RECOVER-FROM-ENCODING-ERROR ...). In +addition, a method on BUFFER-READ-CHAR is defined with the first +argument being the buffer, the second argument a STREAM-CLASS instance +used only for dispatching. The BUFFER-READ-CHAR generic function is +used to shortcut through the streams mechanic from the +OCTETS-TO-STRING function." + (with-unique-names (char-code body-fn dummy-stream) + (let ((body body)) + `(progn + (defmethod stream-read-char ((,stream-var ,stream-class)) + "This method was generated with the DEFINE-CHAR-READER macro." + (declare (optimize speed)) + ;; note that we do nothing for the :LF EOL style because we + ;; assume that #\Newline is the same as #\Linefeed in all + ;; Lisps which will use this library + (with-accessors ((last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) + ,stream-var + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (let ((,char-code + (flet ((,body-fn () ,@body)) + (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) + (,body-fn)))) + ;; remember this character and the current external format + ;; for UNREAD-CHAR + (setq last-char-code ,char-code) + (or (code-char ,char-code) ,char-code)))) + (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class)) + (declare (optimize speed)) + (declare (ignore ,dummy-stream)) ; used only for dispatch + (block stream-read-char ;; for RETURN-FROM in BODY + (let ((,char-code (progn ,@body))) + (declare (type fixnum ,char-code)) + (or (buffer-code-char ,stream-var ,char-code) ,char-code))))))))
(defun recover-from-encoding-error (flexi-stream format-control &rest format-args) "Helper function used by the STREAM-READ-CHAR methods below to deal @@ -582,4 +626,4 @@ (t (= octet peek-type))) finally (unless (eql octet eof-value) (unread-byte octet flexi-input-stream)) - (return octet))) \ No newline at end of file + (return octet)))
Modified: branches/hans/stream.lisp ============================================================================== --- branches/hans/stream.lisp (original) +++ branches/hans/stream.lisp Thu May 1 02:31:46 2008 @@ -509,46 +509,50 @@ ;; http://thread.gmane.org/gmane.lisp.lispworks.general/6269 (set-class stream))
+(defun input-stream-class-name (external-format) + "Given an EXTERNAL-FORMAT, return the flexi-stream class name that + needs to be used for reading such encoded data. Returns the class' + name (a symbol)." + (declare (optimize speed)) + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-cr-ascii-input-stream + 'flexi-ascii-input-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-cr-latin-1-input-stream + 'flexi-latin-1-input-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-cr-8-bit-input-stream + 'flexi-8-bit-input-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-cr-utf-8-input-stream + 'flexi-utf-8-input-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-16-le-input-stream + 'flexi-cr-utf-16-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-16-le-input-stream + 'flexi-utf-16-be-input-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-32-le-input-stream + 'flexi-cr-utf-32-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-32-le-input-stream + 'flexi-utf-32-be-input-stream)))))))) + (defmethod set-class ((stream flexi-input-stream)) "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-input-stream - 'flexi-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-input-stream - 'flexi-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-input-stream - 'flexi-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-input-stream - 'flexi-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-input-stream - 'flexi-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-input-stream - 'flexi-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-input-stream - 'flexi-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-input-stream - 'flexi-utf-32-be-input-stream)))))))))) + (change-class stream + (input-stream-class-name (flexi-stream-external-format stream))))
(defmethod set-class ((stream flexi-output-stream)) "Changes the actual class of STREAM depending on its external format."
Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 02:31:46 2008 @@ -38,19 +38,58 @@ (let ((flexi (make-flexi-stream out :external-format external-format))) (write-string string flexi :start start :end end))))
+;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion +;; of octets to strings, circumventing streams. + +(defstruct (to-string-conversion-buffer + (:conc-name tscb-)) + (vector nil :type (simple-array (unsigned-byte 8) *)) + (position nil :type fixnum) + (end nil :type fixnum) + (eol-style nil :type (or null symbol))) + +(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer)) + (declare (optimize speed (safety 0))) + (let ((position (tscb-position to-string-conversion-buffer))) + (when (< position (tscb-end to-string-conversion-buffer)) + (prog1 + (aref (tscb-vector to-string-conversion-buffer) position) + (incf (tscb-position to-string-conversion-buffer)))))) + (defun octets-to-string (vector &key (external-format (make-external-format :latin1)) (start 0) (end (length vector))) "Converts the Lisp vector VECTOR of octets from START to END to string using the external format EXTERNAL-FORMAT." + (declare (optimize speed (safety 0))) + (declare (type (simple-array (unsigned-byte 8) *) vector) + (type fixnum start end)) + (let ((buffer (make-to-string-conversion-buffer :vector vector + :position start + :end end + :eol-style (external-format-eol-style external-format))) + (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format)) + (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0))) + (declare (type (array character (*)) string)) + (do ((char (buffer-read-char buffer dummy-input-stream) + (buffer-read-char buffer dummy-input-stream))) + ((eql char :eof) + string) + (vector-push char string)))) + +(defun octets-to-string* (vector &key (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) + "Converts the Lisp vector VECTOR of octets from START to END to +string using the external format EXTERNAL-FORMAT." + ;; This version of OCTETS-TO-STRING is here so that one can do speed + ;; comparisons. It should be significantly slower than the version + ;; above. (declare (optimize speed)) (with-input-from-sequence (in vector :start start :end end) (let ((flexi (make-flexi-stream in :external-format external-format)) (result (make-array (- end start) :element-type #+:lispworks 'lw:simple-char - #-:lispworks 'character + #-:lispworks 'character :fill-pointer t))) (setf (fill-pointer result) (read-sequence result flexi)) result))) - -
flexi-streams-cvs@common-lisp.net