Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl
Commits:
-
f5b4ae8a
by Raymond Toy at 2025-01-12T11:39:37-08:00
-
83db856a
by Raymond Toy at 2025-01-12T11:49:48-08:00
9 changed files:
- src/pcl/simple-streams/external-formats/ascii.lisp
- src/pcl/simple-streams/external-formats/iso8859-1.lisp
- src/pcl/simple-streams/external-formats/utf-16-be.lisp
- src/pcl/simple-streams/external-formats/utf-16-le.lisp
- src/pcl/simple-streams/external-formats/utf-16.lisp
- src/pcl/simple-streams/external-formats/utf-32-be.lisp
- src/pcl/simple-streams/external-formats/utf-32-le.lisp
- src/pcl/simple-streams/external-formats/utf-32.lisp
- src/pcl/simple-streams/external-formats/utf-8.lisp
Changes:
| ... | ... | @@ -33,5 +33,15 @@ replaced with a question mark.") |
| 33 | 33 | (declare (optimize (ext:inhibit-warnings 3)))
|
| 34 | 34 | (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code))
|
| 35 | 35 | #x3F)
|
| 36 | - ,code))))
|
|
| 36 | + ,code)))
|
|
| 37 | + ()
|
|
| 38 | + ()
|
|
| 39 | + (octet-count (code state error)
|
|
| 40 | + `(if (> ,code #x7f)
|
|
| 41 | + (if ,error
|
|
| 42 | + (locally
|
|
| 43 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 44 | + (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code))
|
|
| 45 | + 1)
|
|
| 46 | + 1)))
|
|
| 37 | 47 |
| ... | ... | @@ -31,4 +31,17 @@ character and illegal outputs are replaced by a question mark.") |
| 31 | 31 | (funcall ,error "Cannot output codepoint #x~X to ISO8859-1 stream"
|
| 32 | 32 | ,code 1))
|
| 33 | 33 | #x3F)
|
| 34 | - ,code)))) |
|
| 34 | + ,code)))
|
|
| 35 | + ()
|
|
| 36 | + ()
|
|
| 37 | + (octet-count (code state error)
|
|
| 38 | + `(if (> ,code 255)
|
|
| 39 | + (if ,error
|
|
| 40 | + (locally
|
|
| 41 | + ;; No warnings about fdefinition
|
|
| 42 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 43 | + (funcall ,error
|
|
| 44 | + (intl:gettext "Cannot output codepoint #x~X to ISO8859-1 stream")
|
|
| 45 | + ,code 1))
|
|
| 46 | + 1)
|
|
| 47 | + 1))) |
| ... | ... | @@ -110,4 +110,12 @@ Unicode replacement character.") |
| 110 | 110 | (copy-state (state)
|
| 111 | 111 | ;; The state is either NIL or a codepoint, so nothing really
|
| 112 | 112 | ;; special is needed to copy it.
|
| 113 | - `(progn ,state))) |
|
| 113 | + `(progn ,state))
|
|
| 114 | + (code-to-octets (code state error)
|
|
| 115 | + `(cond ((< ,code #x10000)
|
|
| 116 | + 2)
|
|
| 117 | + ((< ,code #x110000)
|
|
| 118 | + 4)
|
|
| 119 | + (t
|
|
| 120 | + ;; Replacement character is 2 octets
|
|
| 121 | + 2)))) |
| ... | ... | @@ -111,4 +111,12 @@ Unicode replacement character.") |
| 111 | 111 | (copy-state (state)
|
| 112 | 112 | ;; The state is either NIL or a codepoint, so nothing really
|
| 113 | 113 | ;; special is needed.
|
| 114 | - `(progn ,state))) |
|
| 114 | + `(progn ,state))
|
|
| 115 | + (code-to-octets (code state error)
|
|
| 116 | + `(cond ((< ,code #x10000)
|
|
| 117 | + 2)
|
|
| 118 | + ((< ,code #x110000)
|
|
| 119 | + 4)
|
|
| 120 | + (t
|
|
| 121 | + ;; Replacement character is 2 octets
|
|
| 122 | + 2)))) |
| ... | ... | @@ -156,4 +156,18 @@ Unicode replacement character.") |
| 156 | 156 | ,c))))))
|
| 157 | 157 | (copy-state (state)
|
| 158 | 158 | ;; The state is list. Copy it
|
| 159 | - `(copy-list ,state))) |
|
| 159 | + `(copy-list ,state))
|
|
| 160 | + (code-to-octets (code state error)
|
|
| 161 | + `(progn
|
|
| 162 | + #+nil
|
|
| 163 | + (unless ,state
|
|
| 164 | + ;; Output BOM
|
|
| 165 | + (output #xFEFF)
|
|
| 166 | + (setf ,state t))
|
|
| 167 | + (cond ((< ,code #x10000)
|
|
| 168 | + 2)
|
|
| 169 | + ((< ,code #x110000)
|
|
| 170 | + 4)
|
|
| 171 | + (t
|
|
| 172 | + ;; Replacement character is 2 octets
|
|
| 173 | + 2))))) |
| ... | ... | @@ -61,4 +61,18 @@ Unicode replacement character.") |
| 61 | 61 | ,code))
|
| 62 | 62 | +replacement-character-code+)))
|
| 63 | 63 | (t
|
| 64 | - (out ,code)))))) |
|
| 64 | + (out ,code)))))
|
|
| 65 | + ()
|
|
| 66 | + ()
|
|
| 67 | + (code-to-octets (code state error)
|
|
| 68 | + `(cond ((lisp::surrogatep ,code)
|
|
| 69 | + (if ,error
|
|
| 70 | + (locally
|
|
| 71 | + ;; No warnings about fdefinition
|
|
| 72 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 73 | + (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
|
| 74 | + ,code))
|
|
| 75 | + ;; Replacement character is 2 octets
|
|
| 76 | + 2))
|
|
| 77 | + (t
|
|
| 78 | + 4))))) |
| ... | ... | @@ -62,4 +62,18 @@ Unicode replacement character.") |
| 62 | 62 | ,code))
|
| 63 | 63 | +replacement-character-code+)))
|
| 64 | 64 | (t
|
| 65 | - (out ,code)))))) |
|
| 65 | + (out ,code)))))
|
|
| 66 | + ()
|
|
| 67 | + ()
|
|
| 68 | + (code-to-octets (code state error)
|
|
| 69 | + `(cond ((lisp::surrogatep ,code)
|
|
| 70 | + (if ,error
|
|
| 71 | + (locally
|
|
| 72 | + ;; No warnings about fdefinition
|
|
| 73 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 74 | + (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
|
| 75 | + ,code))
|
|
| 76 | + ;; Replacement character is 2 octets
|
|
| 77 | + 2))
|
|
| 78 | + (t
|
|
| 79 | + 4)))) |
| ... | ... | @@ -114,4 +114,22 @@ Unicode replacement character.") |
| 114 | 114 | nil
|
| 115 | 115 | (copy-state (state)
|
| 116 | 116 | ;; The state is either NIL or T, so we can just return that.
|
| 117 | - `(progn ,state))) |
|
| 117 | + `(progn ,state))
|
|
| 118 | + (code-to-octets (code state output error i c)
|
|
| 119 | + `(progn
|
|
| 120 | + ;; Should we count the BOM?
|
|
| 121 | + #+nil
|
|
| 122 | + (unless ,state
|
|
| 123 | + (out #xFEFF)
|
|
| 124 | + (setf ,state t))
|
|
| 125 | + (cond ((lisp::surrogatep ,code)
|
|
| 126 | + (if ,error
|
|
| 127 | + (locally
|
|
| 128 | + ;; No warnings about fdefinition
|
|
| 129 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 130 | + (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
|
| 131 | + ,code))
|
|
| 132 | + ;; Replacement character is 2 octets
|
|
| 133 | + 2))
|
|
| 134 | + (t
|
|
| 135 | + 4))))) |
| ... | ... | @@ -127,4 +127,14 @@ replacement character.") |
| 127 | 127 | ((< ,code #x800) (utf8 ,code 1))
|
| 128 | 128 | ((< ,code #x10000) (utf8 ,code 2))
|
| 129 | 129 | ((< ,code #x110000) (utf8 ,code 3))
|
| 130 | - (t (error "How did this happen? Codepoint U+~X is illegal" ,code)))))) |
|
| 130 | + (t (error "How did this happen? Codepoint U+~X is illegal" ,code)))))
|
|
| 131 | + ()
|
|
| 132 | + ()
|
|
| 133 | + (octet-count (code state error)
|
|
| 134 | + `(locally
|
|
| 135 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 136 | + (cond ((< ,code #x80) 1)
|
|
| 137 | + ((< ,code #x800) 2)
|
|
| 138 | + ((< ,code #x10000) 3)
|
|
| 139 | + ((< ,code #x110000) 4)
|
|
| 140 | + (t (error "How did this happen? Codepoint U+~X is illegal" ,code)))))) |