Author: hhubner Date: Thu May 1 11:49:13 2008 New Revision: 8
Modified: branches/hans/input.lisp branches/hans/output.lisp branches/hans/strings.lisp branches/hans/test-speed.lisp Log: Fast and unhygienic version of OCTETS-TO-STRING. This gives a 10x speedup compared to the streams-based implementation.
Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 11:49:13 2008 @@ -242,35 +242,35 @@ (decf position) (push #.(char-code #\Return) octet-stack)))))
-(defun buffer-code-char (buffer char-code) - "Given a BUFFER, which is assumed to be a - to-string-conversion-buffer (see strings.lisp) and a character - code, convert to a character and perform newline processing for the - stream if the character is a #\Return. This code basically repeats - what the stream-read-char ((stream flexi-cr-mixin)) does, but it - does so in an optimized manner to make octet->string conversion - faster." +(defun code-char-with-newline-processing (char-code eol-style read-char-code-fn unread-char-code-fn) + "Perform newline conversion during octets-to-string processing. +CHAR-CODE is the code of the current character. If it denotes a +#\Return character, newline processing accoring to EOL-STYLE is +performed. READ-CHAR-CODE-FN and UNREAD-CHAR-CODE-FN are called to +read the next character code from the input, unread-char-code-fn is +called to skip back in the input by one octet. All this works under +the assumption that #\Return and #\Linefeed are single-octet codes." (declare (optimize speed (safety 0)) - (type to-string-conversion-buffer buffer) - (type fixnum char-code)) + (type fixnum char-code) + (type symbol eol-style)) (let ((char (code-char char-code))) (if (eql char #\Return) - (case (tscb-eol-style buffer) + (case eol-style (:cr #\Newline) (:crlf - (cond - ((= (tscb-position buffer) (tscb-end buffer)) + (case (funcall read-char-code-fn) + (:eof :eof) - ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer))) - (incf (tscb-position buffer)) - #\Newline) + (#.(char-code #\Newline) + #\Newline) (t + (funcall unread-char-code-fn) #\Return))) (t #\Return)) char))) -(declaim (inline buffer-code-char)) +(declaim (inline code-char-with-newline-processing))
(defmacro define-char-reader ((stream-var stream-class) &body body) "Helper macro to define methods for STREAM-READ-CHAR and @@ -284,7 +284,7 @@ used only for dispatching. The BUFFER-READ-CHAR generic function is used to shortcut through the streams mechanic from the OCTETS-TO-STRING function." - (with-unique-names (char-code body-fn dummy-stream) + (with-unique-names (char-code body-fn octets-var) (let ((body body)) `(progn (defmethod stream-read-char ((,stream-var ,stream-class)) @@ -304,13 +304,33 @@ ;; for UNREAD-CHAR (setq last-char-code ,char-code) (or (code-char ,char-code) ,char-code)))) - (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class)) - (declare (optimize speed)) - (declare (ignore ,dummy-stream)) ; used only for dispatch - (block stream-read-char ;; for RETURN-FROM in BODY - (let ((,char-code (progn ,@body))) - (declare (type fixnum ,char-code)) - (or (buffer-code-char ,stream-var ,char-code) ,char-code)))))))) + (defmethod octets-to-string% ((,stream-var ,stream-class) ,octets-var &key start end) + (let ((position start) + save-position + (eol-style (external-format-eol-style (flexi-stream-external-format ,stream-var))) + (string (make-array (- end start) :element-type 'character :fill-pointer 0))) + (labels ((read-byte* (stream) + (declare (ignore stream)) + (if (< position end) + (prog1 + (aref ,octets-var position) + (incf position)) + :eof)) + (read-char-code () + (setf save-position position) + (block stream-read-char ;; for RETURN-FROM in BODY + ,@body)) + (unread-char-code () + (setf position save-position))) + (do ((char-code (read-char-code) (read-char-code))) + ((eql char-code :eof) + string) + (vector-push (or (code-char-with-newline-processing char-code + eol-style + #'read-char-code + #'unread-char-code) + char-code) + string)))))))))
(defun recover-from-encoding-error (flexi-stream format-control &rest format-args) "Helper function used by the STREAM-READ-CHAR methods below to deal
Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Thu May 1 11:49:13 2008 @@ -88,7 +88,9 @@
(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) (declare (optimize speed)) + (declare (type character char)) (let ((octet (char-code char))) + (declare (type fixnum char-code)) (when (> octet 127) (signal-encoding-error stream "~S is not an ASCII character." char)) (write-byte* octet sink))
Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 11:49:13 2008 @@ -58,24 +58,6 @@ (let ((flexi (make-flexi-stream out :external-format external-format))) (write-string string flexi :start start :end end))))
-;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion -;; of octets to strings, circumventing streams. - -(defstruct (to-string-conversion-buffer - (:conc-name tscb-)) - (vector nil :type (simple-array (unsigned-byte 8) *)) - (position nil :type fixnum) - (end nil :type fixnum) - (eol-style nil :type (or null symbol))) - -(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer)) - (declare (optimize speed (safety 0))) - (let ((position (tscb-position to-string-conversion-buffer))) - (when (< position (tscb-end to-string-conversion-buffer)) - (prog1 - (aref (tscb-vector to-string-conversion-buffer) position) - (incf (tscb-position to-string-conversion-buffer)))))) - (defun octets-to-string (vector &key (external-format (make-external-format :latin1)) (start 0) (end (length vector))) "Converts the Lisp vector VECTOR of octets from START to END to @@ -83,18 +65,9 @@ (declare (optimize speed (safety 0))) (declare (type (simple-array (unsigned-byte 8) *) vector) (type fixnum start end)) - (let ((buffer (make-to-string-conversion-buffer :vector vector - :position start - :end end - :eol-style (external-format-eol-style external-format))) - (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format)) - (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0))) - (declare (type (array character (*)) string)) - (do ((char (buffer-read-char buffer dummy-input-stream) - (buffer-read-char buffer dummy-input-stream))) - ((eql char :eof) - string) - (vector-push char string)))) + (octets-to-string% (make-flexi-stream (make-string-input-stream "") :external-format external-format) + vector + :start start :end end))
(defun octets-to-string* (vector &key (external-format (make-external-format :latin1)) (start 0) (end (length vector)))
Modified: branches/hans/test-speed.lisp ============================================================================== --- branches/hans/test-speed.lisp (original) +++ branches/hans/test-speed.lisp Thu May 1 11:49:13 2008 @@ -43,16 +43,16 @@ (dotimes (i character-count) (setf (aref octets i) (+ 32 (random 96)))) (format t "testing with latin-1 encoding, streams based~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string* octets :external-format (make-external-format :latin-1))))) (format t "testing with utf-8 encoding, streams based~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string* octets :external-format (make-external-format :utf-8))))) (format t "testing with latin-1 encoding, optimized~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string octets :external-format (make-external-format :latin-1))))) (format t "testing with utf-8 encoding, optimized~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string octets :external-format (make-external-format :utf-8))))))))
(defmacro profile (&body body)