Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl
Commits:
-
fe3f7011
by Raymond Toy at 2025-01-13T08:53:49-08:00
-
f0aea1e8
by Raymond Toy at 2025-01-13T08:54:08-08:00
-
260c3806
by Raymond Toy at 2025-01-13T12:09:11-08:00
-
f5e4262b
by Raymond Toy at 2025-01-13T12:09:41-08:00
3 changed files:
- src/pcl/simple-streams/external-formats/euc-kr.lisp
- src/pcl/simple-streams/external-formats/iso8859-2.lisp
- tests/external-formats.lisp
Changes:
... | ... | @@ -1007,4 +1007,16 @@ character and illegal outputs are replaced by a question mark.") |
1007 | 1007 | (t
|
1008 | 1008 | (if ,error
|
1009 | 1009 | (funcall ,error "Cannot output codepoint #x~X to EUC-KR format." ,code)
|
1010 | - (,output #X3f)))))))) |
|
1010 | + (,output #X3f)))))))
|
|
1011 | + ()
|
|
1012 | + ()
|
|
1013 | + (octet-count (code state error present)
|
|
1014 | + `(if (<= ,code #x7f)
|
|
1015 | + 1
|
|
1016 | + (let ((,present (get-inverse ,itable ,code)))
|
|
1017 | + (cond (,present
|
|
1018 | + 2)
|
|
1019 | + (t
|
|
1020 | + (if ,error
|
|
1021 | + (funcall ,error "Cannot output codepoint #x~X to EUC-KR format." ,code)
|
|
1022 | + 1))))))) |
... | ... | @@ -47,4 +47,19 @@ character and illegal outputs are replaced by a question mark.") |
47 | 47 | (declare (optimize (ext:inhibit-warnings 3)))
|
48 | 48 | (funcall ,error "Cannot output codepoint #x~X to ISO8859-2 stream"
|
49 | 49 | ,code))
|
50 | - #x3F))))))) |
|
50 | + #x3F))))))
|
|
51 | + ()
|
|
52 | + ()
|
|
53 | + (octet-count (code state error present)
|
|
54 | + `(if (< ,code 160)
|
|
55 | + 1
|
|
56 | + (let ((,present (get-inverse ,itable ,code)))
|
|
57 | + (if ,present
|
|
58 | + 1
|
|
59 | + (if ,error
|
|
60 | + (locally
|
|
61 | + ;; No warnings about fdefinition
|
|
62 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
63 | + (funcall ,error "Cannot output codepoint #x~X to ISO8859-2 stream"
|
|
64 | + ,code))
|
|
65 | + 1)))))) |
... | ... | @@ -60,7 +60,53 @@ |
60 | 60 | (:tag :octet-count)
|
61 | 61 | (test-octet-count *test-unicode* :utf-32-le))
|
62 | 62 | |
63 | +(define-test octet-count.euc-kr
|
|
64 | + (:tag :octet-count)
|
|
65 | + (test-octet-count *test-unicode* :euc-kr))
|
|
66 | + |
|
67 | +(define-test octet-count.iso8859-2
|
|
68 | + (:tag :octet-count)
|
|
69 | + (test-octet-count *test-iso8859-1* :iso8859-2))
|
|
70 | + |
|
71 | +(define-test octet-count.iso8859-3
|
|
72 | + (:tag :octet-count)
|
|
73 | + (test-octet-count *test-iso8859-1* :iso8859-3))
|
|
74 | + |
|
75 | +(define-test octet-count.iso8859-4
|
|
76 | + (:tag :octet-count)
|
|
77 | + (test-octet-count *test-iso8859-1* :iso8859-4))
|
|
78 | + |
|
79 | +(define-test octet-count.iso8859-5
|
|
80 | + (:tag :octet-count)
|
|
81 | + (test-octet-count *test-iso8859-1* :iso8859-5))
|
|
82 | + |
|
83 | +(define-test octet-count.iso8859-6
|
|
84 | + (:tag :octet-count)
|
|
85 | + (test-octet-count *test-iso8859-1* :iso8859-6))
|
|
63 | 86 | |
87 | +(define-test octet-count.iso8859-7
|
|
88 | + (:tag :octet-count)
|
|
89 | + (test-octet-count *test-iso8859-1* :iso8859-7))
|
|
90 | + |
|
91 | +(define-test octet-count.iso8859-8
|
|
92 | + (:tag :octet-count)
|
|
93 | + (test-octet-count *test-iso8859-1* :iso8859-8))
|
|
94 | + |
|
95 | +(define-test octet-count.iso8859-10
|
|
96 | + (:tag :octet-count)
|
|
97 | + (test-octet-count *test-iso8859-1* :iso8859-10))
|
|
98 | + |
|
99 | +(define-test octet-count.iso8859-13
|
|
100 | + (:tag :octet-count)
|
|
101 | + (test-octet-count *test-iso8859-1* :iso8859-13))
|
|
102 | + |
|
103 | +(define-test octet-count.iso8859-14
|
|
104 | + (:tag :octet-count)
|
|
105 | + (test-octet-count *test-iso8859-1* :iso8859-14))
|
|
106 | + |
|
107 | +(define-test octet-count.iso8859-15
|
|
108 | + (:tag :octet-count)
|
|
109 | + (test-octet-count *test-iso8859-1* :iso8859-15))
|
|
64 | 110 | |
65 | 111 | |
66 | 112 |