Author: hhubner Date: Thu May 1 09:41:05 2008 New Revision: 5
Added: branches/hans/test-speed.lisp Modified: branches/hans/flexi-streams.asd branches/hans/input.lisp branches/hans/stream.lisp branches/hans/strings.lisp Log: Speed up string-to-octets by shortcutting through the streams mechanic.
Modified: branches/hans/flexi-streams.asd ============================================================================== --- branches/hans/flexi-streams.asd (original) +++ branches/hans/flexi-streams.asd Thu May 1 09:41:05 2008 @@ -49,8 +49,8 @@ (:file "stream") #+:lispworks (:file "lw-binary-stream") (:file "output") - (:file "input") - (:file "strings")) + (:file "strings") + (:file "input")) :depends-on (:trivial-gray-streams))
(defsystem :flexi-streams-test
Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 09:41:05 2008 @@ -43,8 +43,8 @@ (octet-stack flexi-stream-octet-stack) (s flexi-stream-stream)) flexi-input-stream - (declare (integer position) - (type (or null integer) bound)) + (declare (fixnum position) + (type (or null fixnum) bound)) (when (and bound (>= position bound)) (return-from read-byte* nil)) @@ -290,9 +290,6 @@ (defmethod stream-read-char ((,stream-var ,stream-class)) "This method was generated with the DEFINE-CHAR-READER macro." (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library (with-accessors ((last-octet flexi-stream-last-octet) (last-char-code flexi-stream-last-char-code)) ,stream-var @@ -507,6 +504,9 @@ stream (when (eql char #\Return) (case (external-format-eol-style external-format) + ;; note that we do nothing for the :LF EOL style because we + ;; assume that #\Newline is the same as #\Linefeed in all + ;; Lisps which will use this library (:cr (setq char #\Newline last-char-code #.(char-code #\Newline))) ;; in the case :CRLF we have to look ahead one character @@ -627,3 +627,14 @@ finally (unless (eql octet eof-value) (unread-byte octet flexi-input-stream)) (return octet))) + +(defun test-buffer-code-char () + (let* ((vector (make-array 2 :element-type '(unsigned-byte 8) :initial-element (char-code #\F))) + (buffer (make-to-string-conversion-buffer :vector vector + :position 0 + :end 2 + :eol-style :nl)) + (dummy-stream (make-flexi-stream (make-string-input-stream "") :external-format (make-external-format :ascii)))) + (dotimes (i 1000000) + (null (buffer-read-char buffer dummy-stream)) + (setf (tscb-position buffer) 0)))) \ No newline at end of file
Modified: branches/hans/stream.lisp ============================================================================== --- branches/hans/stream.lisp (original) +++ branches/hans/stream.lisp Thu May 1 09:41:05 2008 @@ -170,6 +170,12 @@ MAKE-INSTANCE to create a new FLEXI-OUTPUT-STREAM but use MAKE-FLEXI-STREAM instead."))
+(defgeneric flexi-stream-output-size-factor (stream) + (:documentation "The factor to determine the size of the output +buffer when converting strings to octets for this format. The size of +the buffer allocated will be the number of characters in the string to +convert multiplied by this factor.")) + #+:cmu (defmethod input-stream-p ((stream flexi-output-stream)) "Explicitly states whether this is an input stream." @@ -197,7 +203,7 @@ look ahead for a CR/LF line ending.") (position :initform 0 :initarg :position - :type integer + :type fixnum :accessor flexi-stream-position :documentation "The position within the stream where each octet read counts as one.") @@ -327,6 +333,9 @@ (:documentation "The class for all flexi output streams which use an 8-bit encoding."))
+(defmethod flexi-stream-output-size-factor ((stream flexi-8-bit-output-stream)) + 1) + (defclass flexi-cr-8-bit-output-stream (flexi-cr-mixin flexi-8-bit-output-stream) () (:documentation "The class for all flexi output streams which @@ -357,6 +366,9 @@ (:documentation "Special class for flexi output streams which use the UTF-32 encoding with little-endian byte ordering."))
+(defmethod flexi-stream-output-size-factor ((stream flexi-utf-32-le-output-stream)) + 4) + (defclass flexi-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-utf-32-le-output-stream) () (:documentation "Special class for flexi output streams which @@ -368,6 +380,9 @@ (:documentation "Special class for flexi output streams which use the UTF-32 encoding with big-endian byte ordering."))
+(defmethod flexi-stream-output-size-factor ((stream flexi-utf-32-be-output-stream)) + 4) + (defclass flexi-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-utf-32-be-output-stream) () (:documentation "Special class for flexi output streams which @@ -379,6 +394,9 @@ (:documentation "Special class for flexi output streams which use the UTF-16 encoding with little-endian byte ordering."))
+(defmethod flexi-stream-output-size-factor ((stream flexi-utf-16-le-output-stream)) + 2) + (defclass flexi-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-utf-16-le-output-stream) () (:documentation "Special class for flexi output streams which @@ -390,6 +408,9 @@ (:documentation "Special class for flexi output streams which use the UTF-16 encoding with big-endian byte ordering."))
+(defmethod flexi-stream-output-size-factor ((stream flexi-utf-16-be-output-stream)) + 2) + (defclass flexi-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-utf-16-be-output-stream) () (:documentation "Special class for flexi output streams which @@ -401,6 +422,9 @@ (:documentation "Special class for flexi output streams which use the UTF-8 encoding."))
+(defmethod flexi-stream-output-size-factor ((stream flexi-utf-8-output-stream)) + 1.25) + (defclass flexi-cr-utf-8-output-stream (flexi-cr-mixin flexi-utf-8-output-stream) () (:documentation "Special class for flexi output streams which
Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 09:41:05 2008 @@ -29,11 +29,31 @@
(in-package :flexi-streams)
+(defmethod write-byte* (byte (array array)) + (vector-push-extend byte array)) + (defun string-to-octets (string &key (external-format (make-external-format :latin1)) - (start 0) end) + (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of octets corresponding to the external format EXTERNAL-FORMAT." (declare (optimize speed)) + (declare (type (array character (*)) string)) + (declare (fixnum start end)) + (let* ((dummy-stream (make-flexi-stream (make-broadcast-stream) :external-format external-format)) + (octets (make-array (truncate (* (float (- end start)) (flexi-stream-output-size-factor dummy-stream))) + :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) + (loop + for i of-type fixnum from start below end + do (char-to-octets dummy-stream (aref string i) octets)) + octets)) + +(defun string-to-octets* (string &key (external-format (make-external-format :latin1)) + (start 0) end) + "Converts the Lisp string STRING from START to END to an array of +octets corresponding to the external format EXTERNAL-FORMAT. This +version of STRING-TO-OCTETS is kept around for performance +comparisons." + (declare (optimize speed)) (with-output-to-sequence (out) (let ((flexi (make-flexi-stream out :external-format external-format))) (write-string string flexi :start start :end end)))) @@ -83,6 +103,8 @@ ;; This version of OCTETS-TO-STRING is here so that one can do speed ;; comparisons. It should be significantly slower than the version ;; above. + (declare (type (simple-array (unsigned-byte 8) *) vector)) + (declare (type (integer 0 *) start end)) (declare (optimize speed)) (with-input-from-sequence (in vector :start start :end end) (let ((flexi (make-flexi-stream in :external-format external-format))
Added: branches/hans/test-speed.lisp ============================================================================== --- (empty file) +++ branches/hans/test-speed.lisp Thu May 1 09:41:05 2008 @@ -0,0 +1,92 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.51 2007/12/29 22:58:43 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defmacro without-gcing (&body body) + `(#+openmcl ccl::without-gcing + #+sbcl sb-sys:without-gcing + #-(or openmcl sbcl) + progn + ,@body)) + +(defun test-speed () + (without-gcing + (let* ((character-count 10000) + (octets (make-array character-count :element-type '(unsigned-byte 8)))) + (dotimes (i character-count) + (setf (aref octets i) (+ 32 (random 96)))) + (format t "testing with latin-1 encoding, streams based~%") + (time (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :latin-1))))) + (format t "testing with utf-8 encoding, streams based~%") + (time (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :utf-8))))) + (format t "testing with latin-1 encoding, optimized~%") + (time (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :latin-1))))) + (format t "testing with utf-8 encoding, optimized~%") + (time (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :utf-8)))))))) + +(defmacro profile (&body body) + #+sbcl + `(progn + (sb-profile:reset) + (progn + ,@body) + (sb-profile:report))) + + +(defun profile-speed () + #+sbcl + (sb-profile:profile "FLEX") + (without-gcing + (let* ((character-count 1000) + (octets (make-array character-count :element-type '(unsigned-byte 8)))) + (dotimes (i character-count) + (setf (aref octets i) (+ 32 (random 96)))) + (format t "profiling with latin-1 encoding, streams based~%") + (profile (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :latin-1))))) + (format t "profiling with utf-8 encoding, streams based~%") + (profile (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :utf-8))))) + (format t "profiling with latin-1 encoding, optimized~%") + (profile (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :latin-1))))) + (format t "profiling with utf-8 encoding, optimized~%") + (profile (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :utf-8)))))))) + +(defun fixnum-or-nil (i) + (and (oddp i) #.(char-code #\f))) + +(defun fixnum-and-nil (i) + (values #.(char-code #\f) (oddp i))) \ No newline at end of file
flexi-streams-cvs@common-lisp.net