Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl
Commits:
-
f5b4ae8a
by Raymond Toy at 2025-01-12T11:39:37-08:00
-
83db856a
by Raymond Toy at 2025-01-12T11:49:48-08:00
9 changed files:
- src/pcl/simple-streams/external-formats/ascii.lisp
- src/pcl/simple-streams/external-formats/iso8859-1.lisp
- src/pcl/simple-streams/external-formats/utf-16-be.lisp
- src/pcl/simple-streams/external-formats/utf-16-le.lisp
- src/pcl/simple-streams/external-formats/utf-16.lisp
- src/pcl/simple-streams/external-formats/utf-32-be.lisp
- src/pcl/simple-streams/external-formats/utf-32-le.lisp
- src/pcl/simple-streams/external-formats/utf-32.lisp
- src/pcl/simple-streams/external-formats/utf-8.lisp
Changes:
... | ... | @@ -33,5 +33,15 @@ replaced with a question mark.") |
33 | 33 | (declare (optimize (ext:inhibit-warnings 3)))
|
34 | 34 | (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code))
|
35 | 35 | #x3F)
|
36 | - ,code))))
|
|
36 | + ,code)))
|
|
37 | + ()
|
|
38 | + ()
|
|
39 | + (octet-count (code state error)
|
|
40 | + `(if (> ,code #x7f)
|
|
41 | + (if ,error
|
|
42 | + (locally
|
|
43 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
44 | + (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code))
|
|
45 | + 1)
|
|
46 | + 1)))
|
|
37 | 47 |
... | ... | @@ -31,4 +31,17 @@ character and illegal outputs are replaced by a question mark.") |
31 | 31 | (funcall ,error "Cannot output codepoint #x~X to ISO8859-1 stream"
|
32 | 32 | ,code 1))
|
33 | 33 | #x3F)
|
34 | - ,code)))) |
|
34 | + ,code)))
|
|
35 | + ()
|
|
36 | + ()
|
|
37 | + (octet-count (code state error)
|
|
38 | + `(if (> ,code 255)
|
|
39 | + (if ,error
|
|
40 | + (locally
|
|
41 | + ;; No warnings about fdefinition
|
|
42 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
43 | + (funcall ,error
|
|
44 | + (intl:gettext "Cannot output codepoint #x~X to ISO8859-1 stream")
|
|
45 | + ,code 1))
|
|
46 | + 1)
|
|
47 | + 1))) |
... | ... | @@ -110,4 +110,12 @@ Unicode replacement character.") |
110 | 110 | (copy-state (state)
|
111 | 111 | ;; The state is either NIL or a codepoint, so nothing really
|
112 | 112 | ;; special is needed to copy it.
|
113 | - `(progn ,state))) |
|
113 | + `(progn ,state))
|
|
114 | + (code-to-octets (code state error)
|
|
115 | + `(cond ((< ,code #x10000)
|
|
116 | + 2)
|
|
117 | + ((< ,code #x110000)
|
|
118 | + 4)
|
|
119 | + (t
|
|
120 | + ;; Replacement character is 2 octets
|
|
121 | + 2)))) |
... | ... | @@ -111,4 +111,12 @@ Unicode replacement character.") |
111 | 111 | (copy-state (state)
|
112 | 112 | ;; The state is either NIL or a codepoint, so nothing really
|
113 | 113 | ;; special is needed.
|
114 | - `(progn ,state))) |
|
114 | + `(progn ,state))
|
|
115 | + (code-to-octets (code state error)
|
|
116 | + `(cond ((< ,code #x10000)
|
|
117 | + 2)
|
|
118 | + ((< ,code #x110000)
|
|
119 | + 4)
|
|
120 | + (t
|
|
121 | + ;; Replacement character is 2 octets
|
|
122 | + 2)))) |
... | ... | @@ -156,4 +156,18 @@ Unicode replacement character.") |
156 | 156 | ,c))))))
|
157 | 157 | (copy-state (state)
|
158 | 158 | ;; The state is list. Copy it
|
159 | - `(copy-list ,state))) |
|
159 | + `(copy-list ,state))
|
|
160 | + (code-to-octets (code state error)
|
|
161 | + `(progn
|
|
162 | + #+nil
|
|
163 | + (unless ,state
|
|
164 | + ;; Output BOM
|
|
165 | + (output #xFEFF)
|
|
166 | + (setf ,state t))
|
|
167 | + (cond ((< ,code #x10000)
|
|
168 | + 2)
|
|
169 | + ((< ,code #x110000)
|
|
170 | + 4)
|
|
171 | + (t
|
|
172 | + ;; Replacement character is 2 octets
|
|
173 | + 2))))) |
... | ... | @@ -61,4 +61,18 @@ Unicode replacement character.") |
61 | 61 | ,code))
|
62 | 62 | +replacement-character-code+)))
|
63 | 63 | (t
|
64 | - (out ,code)))))) |
|
64 | + (out ,code)))))
|
|
65 | + ()
|
|
66 | + ()
|
|
67 | + (code-to-octets (code state error)
|
|
68 | + `(cond ((lisp::surrogatep ,code)
|
|
69 | + (if ,error
|
|
70 | + (locally
|
|
71 | + ;; No warnings about fdefinition
|
|
72 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
73 | + (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
|
74 | + ,code))
|
|
75 | + ;; Replacement character is 2 octets
|
|
76 | + 2))
|
|
77 | + (t
|
|
78 | + 4))))) |
... | ... | @@ -62,4 +62,18 @@ Unicode replacement character.") |
62 | 62 | ,code))
|
63 | 63 | +replacement-character-code+)))
|
64 | 64 | (t
|
65 | - (out ,code)))))) |
|
65 | + (out ,code)))))
|
|
66 | + ()
|
|
67 | + ()
|
|
68 | + (code-to-octets (code state error)
|
|
69 | + `(cond ((lisp::surrogatep ,code)
|
|
70 | + (if ,error
|
|
71 | + (locally
|
|
72 | + ;; No warnings about fdefinition
|
|
73 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
74 | + (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
|
75 | + ,code))
|
|
76 | + ;; Replacement character is 2 octets
|
|
77 | + 2))
|
|
78 | + (t
|
|
79 | + 4)))) |
... | ... | @@ -114,4 +114,22 @@ Unicode replacement character.") |
114 | 114 | nil
|
115 | 115 | (copy-state (state)
|
116 | 116 | ;; The state is either NIL or T, so we can just return that.
|
117 | - `(progn ,state))) |
|
117 | + `(progn ,state))
|
|
118 | + (code-to-octets (code state output error i c)
|
|
119 | + `(progn
|
|
120 | + ;; Should we count the BOM?
|
|
121 | + #+nil
|
|
122 | + (unless ,state
|
|
123 | + (out #xFEFF)
|
|
124 | + (setf ,state t))
|
|
125 | + (cond ((lisp::surrogatep ,code)
|
|
126 | + (if ,error
|
|
127 | + (locally
|
|
128 | + ;; No warnings about fdefinition
|
|
129 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
130 | + (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
|
|
131 | + ,code))
|
|
132 | + ;; Replacement character is 2 octets
|
|
133 | + 2))
|
|
134 | + (t
|
|
135 | + 4))))) |
... | ... | @@ -127,4 +127,14 @@ replacement character.") |
127 | 127 | ((< ,code #x800) (utf8 ,code 1))
|
128 | 128 | ((< ,code #x10000) (utf8 ,code 2))
|
129 | 129 | ((< ,code #x110000) (utf8 ,code 3))
|
130 | - (t (error "How did this happen? Codepoint U+~X is illegal" ,code)))))) |
|
130 | + (t (error "How did this happen? Codepoint U+~X is illegal" ,code)))))
|
|
131 | + ()
|
|
132 | + ()
|
|
133 | + (octet-count (code state error)
|
|
134 | + `(locally
|
|
135 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
136 | + (cond ((< ,code #x80) 1)
|
|
137 | + ((< ,code #x800) 2)
|
|
138 | + ((< ,code #x10000) 3)
|
|
139 | + ((< ,code #x110000) 4)
|
|
140 | + (t (error "How did this happen? Codepoint U+~X is illegal" ,code)))))) |