[Git][cmucl/cmucl][master] 2 commits: Fix #25: Handle unicode strings more consistently.
Raymond Toy
rtoy at common-lisp.net
Thu Aug 25 02:26:47 UTC 2016
Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
a8c27cfa by Raymond Toy at 2016-08-21T14:38:34-07:00
Fix #25: Handle unicode strings more consistently.
When writing a string to the program too few octets were written
because strings are now 16-bits wide. To fix this, only write the low
8-bits of each character. This matches what reading does.
This pretty much implies that the caller should use
STREAM:STRING-ENCODE and STREAM::STRING-DECODE on the strings.
Add several tests to verify the expected results.
- - - - -
5beb6431 by Raymond Toy at 2016-08-25T02:26:43+00:00
Merge branch 'rtoy-fix-issue-25' into 'master'
Fix #25: Handle unicode strings more consistently.
When writing a string to the program too few octets were written
because strings are now 16-bits wide. To fix this, only write the low
8-bits of each character. This matches what reading does.
This pretty much implies that the caller should use
STREAM:STRING-ENCODE and STREAM::STRING-DECODE on the strings.
Add several tests to verify the expected results.
See merge request !8
- - - - -
2 changed files:
- src/code/run-program.lisp
- tests/issues.lisp
Changes:
=====================================
src/code/run-program.lisp
=====================================
--- a/src/code/run-program.lisp
+++ b/src/code/run-program.lisp
@@ -749,7 +749,13 @@
(read-line object nil nil)
(unless line
(return))
- (unix:unix-write fd line 0 (length line))
+ ;; Take just the low 8 bits of each char
+ ;; (code) of the string and write that out to
+ ;; the descriptor.
+ (let ((output (make-array (length line) :element-type '(unsigned-byte 8))))
+ (dotimes (k (length output))
+ (setf (aref output k) (ldb (byte 8 0) (char-code (aref line k)))))
+ (unix:unix-write fd output 0 (length output)))
(if no-cr
(return)
(unix:unix-write fd newline 0 1)))))
=====================================
tests/issues.lisp
=====================================
--- a/tests/issues.lisp
+++ b/tests/issues.lisp
@@ -210,3 +210,91 @@
(assert-eql 3d0 (funcall tester 3d0))
(assert-eql 4w0 (funcall tester 4w0))))
+(define-test issue.25a
+ (:tag :issues)
+ ;; The original test from issue 25, modified slightly for lisp-unit
+ ;; testing.
+ (let* ((in-string (format nil "A line.~%And another.~%")))
+ (with-output-to-string (out-stream nil)
+ (with-input-from-string (in-stream in-string)
+ (ext:run-program "cat" nil
+ :wait t
+ :input in-stream
+ :output out-stream))
+ (let ((out-string (get-output-stream-string out-stream)))
+ (assert-eql (length in-string) (length out-string))
+ (assert-equal in-string out-string)))))
+
+(define-test issue.25b
+ (:tag :issues)
+ ;; Modified test to verify that we only write the low 8-bits of each
+ ;; string character to run-program.
+ (let* ((in-string (concatenate 'string '(#\greek_small_letter_alpha
+ #\greek_small_letter_beta)))
+ (expected (map 'string #'(lambda (c)
+ (code-char (ldb (byte 8 0) (char-code c))))
+ in-string)))
+ (with-output-to-string (out-stream nil)
+ (with-input-from-string (in-stream in-string)
+ (ext:run-program "cat" nil
+ :wait t
+ :input in-stream
+ :output out-stream))
+ (let ((out-string (get-output-stream-string out-stream)))
+ (assert-eql (length out-string) (length out-string))
+ ;; For comparison, convert the strings to codes so failures are easier to read
+ (assert-equal (map 'list #'char-code out-string)
+ (map 'list #'char-code expected))))))
+
+(define-test issue.25c
+ (:tag :issues)
+ ;; Modified test to verify that each octet read from run-program is
+ ;; read into the low 8-bits of each character of the resulting
+ ;; string.
+ (let* ((in-string (concatenate 'string '(#\greek_small_letter_alpha
+ #\greek_small_letter_beta)))
+ (expected (stream:string-encode in-string :utf16-be))
+ (path #p"issue25c.txt"))
+ (with-open-file (s path :direction :output :if-exists :supersede :external-format :utf16-be)
+ (write-string in-string s)
+ (force-output s)
+ (file-position s 0)
+ (with-open-file (s1 path :direction :input :element-type '(unsigned-byte 8))
+ (with-output-to-string (out-stream)
+ (ext:run-program "cat" nil
+ :wait t
+ :input s1
+ :output out-stream)
+ (let ((out-string (get-output-stream-string out-stream)))
+ (assert-equal (length out-string) (length expected))
+ (assert-equal (map 'list #'char-code out-string)
+ (map 'list #'char-code expected))))))))
+
+
+(define-test issue.25d
+ (:tag :issues)
+ ;; The original test from issue 25, but using non-ascii characters
+ ;; and using string-encode/decode to verify that the output and the
+ ;; input match.
+ (let* ((in-string (concatenate 'string '(#\greek_small_letter_alpha
+ #\greek_small_letter_beta
+ #\greek_small_letter_gamma
+ #\greek_small_letter_delta
+ #\greek_small_letter_epsilon
+ #\greek_small_letter_zeta
+ #\greek_small_letter_eta
+ #\greek_small_letter_theta
+ #\greek_small_letter_iota
+ #\greek_small_letter_kappa
+ #\greek_small_letter_lamda))))
+ (with-output-to-string (out-stream nil)
+ (with-input-from-string (in-stream (stream:string-encode in-string :utf8))
+ (ext:run-program "cat" nil
+ :wait t
+ :input in-stream
+ :output out-stream))
+ (let ((out-string (stream:string-decode (get-output-stream-string out-stream)
+ :utf8)))
+ (assert-eql (length in-string) (length out-string))
+ (assert-equal in-string out-string)))))
+
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/compare/774abc703a5c0a287d528f069a9471ece4f253b1...5beb6431855908e9010c36bc9aeaa57b0daebcf3
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20160825/df6e8ac7/attachment-0001.html>
More information about the cmucl-cvs
mailing list