Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl
Commits:
-
31a76ff7
by Raymond Toy at 2025-01-12T14:08:01-08:00
3 changed files:
- src/pcl/simple-streams/external-formats/utf-16.lisp
- src/pcl/simple-streams/external-formats/utf-32.lisp
- tests/external-formats.lisp
Changes:
... | ... | @@ -158,16 +158,16 @@ Unicode replacement character.") |
158 | 158 | ;; The state is list. Copy it
|
159 | 159 | `(copy-list ,state))
|
160 | 160 | (octet-count (code state error)
|
161 | - `(progn
|
|
162 | - #+nil
|
|
161 | + `(let ((bom-count 0))
|
|
163 | 162 | (unless ,state
|
164 | 163 | ;; Output BOM
|
165 | - (output #xFEFF)
|
|
164 | + (setf bom-count 2)
|
|
166 | 165 | (setf ,state t))
|
167 | - (cond ((< ,code #x10000)
|
|
168 | - 2)
|
|
169 | - ((< ,code #x110000)
|
|
170 | - 4)
|
|
171 | - (t
|
|
172 | - ;; Replacement character is 2 octets
|
|
173 | - 2))))) |
|
166 | + (+ bom-count
|
|
167 | + (cond ((< ,code #x10000)
|
|
168 | + 2)
|
|
169 | + ((< ,code #x110000)
|
|
170 | + 4)
|
|
171 | + (t
|
|
172 | + ;; Replacement character is 2 octets
|
|
173 | + 2)))))) |
... | ... | @@ -116,11 +116,9 @@ Unicode replacement character.") |
116 | 116 | ;; The state is either NIL or T, so we can just return that.
|
117 | 117 | `(progn ,state))
|
118 | 118 | (octet-count (code state error)
|
119 | - `(progn
|
|
120 | - ;; Should we count the BOM?
|
|
121 | - #+nil
|
|
119 | + `(let ((bom-count 0))
|
|
122 | 120 | (unless ,state
|
123 | - (out #xFEFF)
|
|
121 | + (setf bom-count 4)
|
|
124 | 122 | (setf ,state t))
|
125 | 123 | (cond ((lisp::surrogatep ,code)
|
126 | 124 | (if ,error
|
... | ... | @@ -130,6 +128,6 @@ Unicode replacement character.") |
130 | 128 | (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
131 | 129 | ,code))
|
132 | 130 | ;; Replacement character is 2 octets
|
133 | - 2))
|
|
131 | + (+ 2 bom-count)))
|
|
134 | 132 | (t
|
135 | - 4))))) |
|
133 | + (+ 4 bom-count)))))) |
... | ... | @@ -36,7 +36,6 @@ |
36 | 36 | (:tag :octet-count)
|
37 | 37 | (test-octet-count *test-unicode* :utf-8))
|
38 | 38 | |
39 | -#+nil
|
|
40 | 39 | (define-test octet-count.utf-16
|
41 | 40 | (:tag :octet-count)
|
42 | 41 | (test-octet-count *test-unicode* :utf-16))
|
... | ... | @@ -49,7 +48,6 @@ |
49 | 48 | (:tag :octet-count)
|
50 | 49 | (test-octet-count *test-unicode* :utf-16-le))
|
51 | 50 | |
52 | -#+nil
|
|
53 | 51 | (define-test octet-count.utf-32
|
54 | 52 | (:tag :octet-count)
|
55 | 53 | (test-octet-count *test-unicode* :utf-32))
|