Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl
Commits:
-
fe3f7011
by Raymond Toy at 2025-01-13T08:53:49-08:00
-
f0aea1e8
by Raymond Toy at 2025-01-13T08:54:08-08:00
-
260c3806
by Raymond Toy at 2025-01-13T12:09:11-08:00
-
f5e4262b
by Raymond Toy at 2025-01-13T12:09:41-08:00
3 changed files:
- src/pcl/simple-streams/external-formats/euc-kr.lisp
- src/pcl/simple-streams/external-formats/iso8859-2.lisp
- tests/external-formats.lisp
Changes:
| ... | ... | @@ -1007,4 +1007,16 @@ character and illegal outputs are replaced by a question mark.") |
| 1007 | 1007 | (t
|
| 1008 | 1008 | (if ,error
|
| 1009 | 1009 | (funcall ,error "Cannot output codepoint #x~X to EUC-KR format." ,code)
|
| 1010 | - (,output #X3f)))))))) |
|
| 1010 | + (,output #X3f)))))))
|
|
| 1011 | + ()
|
|
| 1012 | + ()
|
|
| 1013 | + (octet-count (code state error present)
|
|
| 1014 | + `(if (<= ,code #x7f)
|
|
| 1015 | + 1
|
|
| 1016 | + (let ((,present (get-inverse ,itable ,code)))
|
|
| 1017 | + (cond (,present
|
|
| 1018 | + 2)
|
|
| 1019 | + (t
|
|
| 1020 | + (if ,error
|
|
| 1021 | + (funcall ,error "Cannot output codepoint #x~X to EUC-KR format." ,code)
|
|
| 1022 | + 1))))))) |
| ... | ... | @@ -47,4 +47,19 @@ character and illegal outputs are replaced by a question mark.") |
| 47 | 47 | (declare (optimize (ext:inhibit-warnings 3)))
|
| 48 | 48 | (funcall ,error "Cannot output codepoint #x~X to ISO8859-2 stream"
|
| 49 | 49 | ,code))
|
| 50 | - #x3F))))))) |
|
| 50 | + #x3F))))))
|
|
| 51 | + ()
|
|
| 52 | + ()
|
|
| 53 | + (octet-count (code state error present)
|
|
| 54 | + `(if (< ,code 160)
|
|
| 55 | + 1
|
|
| 56 | + (let ((,present (get-inverse ,itable ,code)))
|
|
| 57 | + (if ,present
|
|
| 58 | + 1
|
|
| 59 | + (if ,error
|
|
| 60 | + (locally
|
|
| 61 | + ;; No warnings about fdefinition
|
|
| 62 | + (declare (optimize (ext:inhibit-warnings 3)))
|
|
| 63 | + (funcall ,error "Cannot output codepoint #x~X to ISO8859-2 stream"
|
|
| 64 | + ,code))
|
|
| 65 | + 1)))))) |
| ... | ... | @@ -60,7 +60,53 @@ |
| 60 | 60 | (:tag :octet-count)
|
| 61 | 61 | (test-octet-count *test-unicode* :utf-32-le))
|
| 62 | 62 | |
| 63 | +(define-test octet-count.euc-kr
|
|
| 64 | + (:tag :octet-count)
|
|
| 65 | + (test-octet-count *test-unicode* :euc-kr))
|
|
| 66 | + |
|
| 67 | +(define-test octet-count.iso8859-2
|
|
| 68 | + (:tag :octet-count)
|
|
| 69 | + (test-octet-count *test-iso8859-1* :iso8859-2))
|
|
| 70 | + |
|
| 71 | +(define-test octet-count.iso8859-3
|
|
| 72 | + (:tag :octet-count)
|
|
| 73 | + (test-octet-count *test-iso8859-1* :iso8859-3))
|
|
| 74 | + |
|
| 75 | +(define-test octet-count.iso8859-4
|
|
| 76 | + (:tag :octet-count)
|
|
| 77 | + (test-octet-count *test-iso8859-1* :iso8859-4))
|
|
| 78 | + |
|
| 79 | +(define-test octet-count.iso8859-5
|
|
| 80 | + (:tag :octet-count)
|
|
| 81 | + (test-octet-count *test-iso8859-1* :iso8859-5))
|
|
| 82 | + |
|
| 83 | +(define-test octet-count.iso8859-6
|
|
| 84 | + (:tag :octet-count)
|
|
| 85 | + (test-octet-count *test-iso8859-1* :iso8859-6))
|
|
| 63 | 86 | |
| 87 | +(define-test octet-count.iso8859-7
|
|
| 88 | + (:tag :octet-count)
|
|
| 89 | + (test-octet-count *test-iso8859-1* :iso8859-7))
|
|
| 90 | + |
|
| 91 | +(define-test octet-count.iso8859-8
|
|
| 92 | + (:tag :octet-count)
|
|
| 93 | + (test-octet-count *test-iso8859-1* :iso8859-8))
|
|
| 94 | + |
|
| 95 | +(define-test octet-count.iso8859-10
|
|
| 96 | + (:tag :octet-count)
|
|
| 97 | + (test-octet-count *test-iso8859-1* :iso8859-10))
|
|
| 98 | + |
|
| 99 | +(define-test octet-count.iso8859-13
|
|
| 100 | + (:tag :octet-count)
|
|
| 101 | + (test-octet-count *test-iso8859-1* :iso8859-13))
|
|
| 102 | + |
|
| 103 | +(define-test octet-count.iso8859-14
|
|
| 104 | + (:tag :octet-count)
|
|
| 105 | + (test-octet-count *test-iso8859-1* :iso8859-14))
|
|
| 106 | + |
|
| 107 | +(define-test octet-count.iso8859-15
|
|
| 108 | + (:tag :octet-count)
|
|
| 109 | + (test-octet-count *test-iso8859-1* :iso8859-15))
|
|
| 64 | 110 | |
| 65 | 111 | |
| 66 | 112 |