Raymond Toy pushed to branch master at cmucl / cmucl
Commits: a8c27cfa by Raymond Toy at 2016-08-21T14:38:34-07:00 Fix #25: Handle unicode strings more consistently.
When writing a string to the program too few octets were written because strings are now 16-bits wide. To fix this, only write the low 8-bits of each character. This matches what reading does.
This pretty much implies that the caller should use STREAM:STRING-ENCODE and STREAM::STRING-DECODE on the strings.
Add several tests to verify the expected results.
- - - - - 5beb6431 by Raymond Toy at 2016-08-25T02:26:43+00:00 Merge branch 'rtoy-fix-issue-25' into 'master'
Fix #25: Handle unicode strings more consistently.
When writing a string to the program too few octets were written because strings are now 16-bits wide. To fix this, only write the low 8-bits of each character. This matches what reading does.
This pretty much implies that the caller should use STREAM:STRING-ENCODE and STREAM::STRING-DECODE on the strings.
Add several tests to verify the expected results.
See merge request !8 - - - - -
2 changed files:
- src/code/run-program.lisp - tests/issues.lisp
Changes:
===================================== src/code/run-program.lisp ===================================== --- a/src/code/run-program.lisp +++ b/src/code/run-program.lisp @@ -749,7 +749,13 @@ (read-line object nil nil) (unless line (return)) - (unix:unix-write fd line 0 (length line)) + ;; Take just the low 8 bits of each char + ;; (code) of the string and write that out to + ;; the descriptor. + (let ((output (make-array (length line) :element-type '(unsigned-byte 8)))) + (dotimes (k (length output)) + (setf (aref output k) (ldb (byte 8 0) (char-code (aref line k))))) + (unix:unix-write fd output 0 (length output))) (if no-cr (return) (unix:unix-write fd newline 0 1)))))
===================================== tests/issues.lisp ===================================== --- a/tests/issues.lisp +++ b/tests/issues.lisp @@ -210,3 +210,91 @@ (assert-eql 3d0 (funcall tester 3d0)) (assert-eql 4w0 (funcall tester 4w0))))
+(define-test issue.25a + (:tag :issues) + ;; The original test from issue 25, modified slightly for lisp-unit + ;; testing. + (let* ((in-string (format nil "A line.~%And another.~%"))) + (with-output-to-string (out-stream nil) + (with-input-from-string (in-stream in-string) + (ext:run-program "cat" nil + :wait t + :input in-stream + :output out-stream)) + (let ((out-string (get-output-stream-string out-stream))) + (assert-eql (length in-string) (length out-string)) + (assert-equal in-string out-string))))) + +(define-test issue.25b + (:tag :issues) + ;; Modified test to verify that we only write the low 8-bits of each + ;; string character to run-program. + (let* ((in-string (concatenate 'string '(#\greek_small_letter_alpha + #\greek_small_letter_beta))) + (expected (map 'string #'(lambda (c) + (code-char (ldb (byte 8 0) (char-code c)))) + in-string))) + (with-output-to-string (out-stream nil) + (with-input-from-string (in-stream in-string) + (ext:run-program "cat" nil + :wait t + :input in-stream + :output out-stream)) + (let ((out-string (get-output-stream-string out-stream))) + (assert-eql (length out-string) (length out-string)) + ;; For comparison, convert the strings to codes so failures are easier to read + (assert-equal (map 'list #'char-code out-string) + (map 'list #'char-code expected)))))) + +(define-test issue.25c + (:tag :issues) + ;; Modified test to verify that each octet read from run-program is + ;; read into the low 8-bits of each character of the resulting + ;; string. + (let* ((in-string (concatenate 'string '(#\greek_small_letter_alpha + #\greek_small_letter_beta))) + (expected (stream:string-encode in-string :utf16-be)) + (path #p"issue25c.txt")) + (with-open-file (s path :direction :output :if-exists :supersede :external-format :utf16-be) + (write-string in-string s) + (force-output s) + (file-position s 0) + (with-open-file (s1 path :direction :input :element-type '(unsigned-byte 8)) + (with-output-to-string (out-stream) + (ext:run-program "cat" nil + :wait t + :input s1 + :output out-stream) + (let ((out-string (get-output-stream-string out-stream))) + (assert-equal (length out-string) (length expected)) + (assert-equal (map 'list #'char-code out-string) + (map 'list #'char-code expected)))))))) + + +(define-test issue.25d + (:tag :issues) + ;; The original test from issue 25, but using non-ascii characters + ;; and using string-encode/decode to verify that the output and the + ;; input match. + (let* ((in-string (concatenate 'string '(#\greek_small_letter_alpha + #\greek_small_letter_beta + #\greek_small_letter_gamma + #\greek_small_letter_delta + #\greek_small_letter_epsilon + #\greek_small_letter_zeta + #\greek_small_letter_eta + #\greek_small_letter_theta + #\greek_small_letter_iota + #\greek_small_letter_kappa + #\greek_small_letter_lamda)))) + (with-output-to-string (out-stream nil) + (with-input-from-string (in-stream (stream:string-encode in-string :utf8)) + (ext:run-program "cat" nil + :wait t + :input in-stream + :output out-stream)) + (let ((out-string (stream:string-decode (get-output-stream-string out-stream) + :utf8))) + (assert-eql (length in-string) (length out-string)) + (assert-equal in-string out-string))))) +
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/compare/774abc703a5c0a287d528f069...