Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl
Commits:
-
31a76ff7
by Raymond Toy at 2025-01-12T14:08:01-08:00
3 changed files:
- src/pcl/simple-streams/external-formats/utf-16.lisp
- src/pcl/simple-streams/external-formats/utf-32.lisp
- tests/external-formats.lisp
Changes:
| ... | ... | @@ -158,16 +158,16 @@ Unicode replacement character.") |
| 158 | 158 | ;; The state is list. Copy it
|
| 159 | 159 | `(copy-list ,state))
|
| 160 | 160 | (octet-count (code state error)
|
| 161 | - `(progn
|
|
| 162 | - #+nil
|
|
| 161 | + `(let ((bom-count 0))
|
|
| 163 | 162 | (unless ,state
|
| 164 | 163 | ;; Output BOM
|
| 165 | - (output #xFEFF)
|
|
| 164 | + (setf bom-count 2)
|
|
| 166 | 165 | (setf ,state t))
|
| 167 | - (cond ((< ,code #x10000)
|
|
| 168 | - 2)
|
|
| 169 | - ((< ,code #x110000)
|
|
| 170 | - 4)
|
|
| 171 | - (t
|
|
| 172 | - ;; Replacement character is 2 octets
|
|
| 173 | - 2))))) |
|
| 166 | + (+ bom-count
|
|
| 167 | + (cond ((< ,code #x10000)
|
|
| 168 | + 2)
|
|
| 169 | + ((< ,code #x110000)
|
|
| 170 | + 4)
|
|
| 171 | + (t
|
|
| 172 | + ;; Replacement character is 2 octets
|
|
| 173 | + 2)))))) |
| ... | ... | @@ -116,11 +116,9 @@ Unicode replacement character.") |
| 116 | 116 | ;; The state is either NIL or T, so we can just return that.
|
| 117 | 117 | `(progn ,state))
|
| 118 | 118 | (octet-count (code state error)
|
| 119 | - `(progn
|
|
| 120 | - ;; Should we count the BOM?
|
|
| 121 | - #+nil
|
|
| 119 | + `(let ((bom-count 0))
|
|
| 122 | 120 | (unless ,state
|
| 123 | - (out #xFEFF)
|
|
| 121 | + (setf bom-count 4)
|
|
| 124 | 122 | (setf ,state t))
|
| 125 | 123 | (cond ((lisp::surrogatep ,code)
|
| 126 | 124 | (if ,error
|
| ... | ... | @@ -130,6 +128,6 @@ Unicode replacement character.") |
| 130 | 128 | (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
| 131 | 129 | ,code))
|
| 132 | 130 | ;; Replacement character is 2 octets
|
| 133 | - 2))
|
|
| 131 | + (+ 2 bom-count)))
|
|
| 134 | 132 | (t
|
| 135 | - 4))))) |
|
| 133 | + (+ 4 bom-count)))))) |
| ... | ... | @@ -36,7 +36,6 @@ |
| 36 | 36 | (:tag :octet-count)
|
| 37 | 37 | (test-octet-count *test-unicode* :utf-8))
|
| 38 | 38 | |
| 39 | -#+nil
|
|
| 40 | 39 | (define-test octet-count.utf-16
|
| 41 | 40 | (:tag :octet-count)
|
| 42 | 41 | (test-octet-count *test-unicode* :utf-16))
|
| ... | ... | @@ -49,7 +48,6 @@ |
| 49 | 48 | (:tag :octet-count)
|
| 50 | 49 | (test-octet-count *test-unicode* :utf-16-le))
|
| 51 | 50 | |
| 52 | -#+nil
|
|
| 53 | 51 | (define-test octet-count.utf-32
|
| 54 | 52 | (:tag :octet-count)
|
| 55 | 53 | (test-octet-count *test-unicode* :utf-32))
|