| ... |
... |
@@ -89,26 +89,40 @@ the BMP as UTF-16 surrogate pairs." |
|
89
|
89
|
(vector-push-extend (code-char cp) out)))
|
|
90
|
90
|
(coerce out 'simple-string)))
|
|
91
|
91
|
|
|
|
92
|
+(defun run-collation-conformance (ducet file weighting)
|
|
|
93
|
+ "Check every line of the UCA conformance FILE: the four sort-key levels
|
|
|
94
|
+produced by LISP::COLLATION-WEIGHTS under WEIGHTING must match the
|
|
|
95
|
+expected key parsed from the line's comment. Each line is a separate
|
|
|
96
|
+LISP-UNIT assertion.
|
|
|
97
|
+
|
|
|
98
|
+This is a plain function rather than inline in the DEFINE-TESTs below
|
|
|
99
|
+because a DEFINE-TEST body is stored as source and run interpreted; the
|
|
|
100
|
+per-line work over a quarter-million lines must run compiled, so it
|
|
|
101
|
+lives here and the tests just call it."
|
|
|
102
|
+ (with-open-file (s file :direction :input :external-format :utf-8)
|
|
|
103
|
+ (loop for line = (read-line s nil nil)
|
|
|
104
|
+ while line
|
|
|
105
|
+ do
|
|
|
106
|
+ (multiple-value-bind (cps e1 e2 e3 e4)
|
|
|
107
|
+ (collation-parse-test-line line)
|
|
|
108
|
+ (when cps
|
|
|
109
|
+ (multiple-value-bind (g1 g2 g3 g4)
|
|
|
110
|
+ (lisp::collation-weights ducet (collation-test-string cps)
|
|
|
111
|
+ weighting)
|
|
|
112
|
+ ;; For :NON-IGNORABLE the comment has no fourth level
|
|
|
113
|
+ ;; and COLLATION-WEIGHTS returns NIL for L4, so the
|
|
|
114
|
+ ;; same four-level comparison serves both options.
|
|
|
115
|
+ (assert-equalp (list e1 e2 e3 e4)
|
|
|
116
|
+ (list g1 g2 g3 g4)
|
|
|
117
|
+ cps)))))))
|
|
|
118
|
+
|
|
92
|
119
|
(define-test unicode.collation-shifted
|
|
93
|
120
|
"Test UTS #10 collation sort keys against the UCA SHIFTED conformance
|
|
94
|
121
|
data. For each line, the four sort-key levels produced by
|
|
95
|
122
|
LISP::COLLATION-WEIGHTS must match the expected key in the line's
|
|
96
|
123
|
comment."
|
|
97
|
124
|
(:tag :unicode)
|
|
98
|
|
- (let ((ducet (ducet)))
|
|
99
|
|
- (with-open-file (s *collation-shifted-test* :direction :input
|
|
100
|
|
- :external-format :utf-8)
|
|
101
|
|
- (loop for line = (read-line s nil nil)
|
|
102
|
|
- while line
|
|
103
|
|
- do
|
|
104
|
|
- (multiple-value-bind (cps e1 e2 e3 e4)
|
|
105
|
|
- (collation-parse-test-line line)
|
|
106
|
|
- (when cps
|
|
107
|
|
- (multiple-value-bind (g1 g2 g3 g4)
|
|
108
|
|
- (lisp::collation-weights ducet (collation-test-string cps))
|
|
109
|
|
- (assert-equalp (list e1 e2 e3 e4)
|
|
110
|
|
- (list g1 g2 g3 g4)
|
|
111
|
|
- cps))))))))
|
|
|
125
|
+ (run-collation-conformance (ducet) *collation-shifted-test* :shifted))
|
|
112
|
126
|
|
|
113
|
127
|
(define-test unicode.collation-non-ignorable
|
|
114
|
128
|
"Test UTS #10 collation sort keys against the UCA NON_IGNORABLE
|
| ... |
... |
@@ -117,18 +131,5 @@ their weights and there is no fourth level, so for each line the three |
|
117
|
131
|
weight levels produced by LISP::COLLATION-WEIGHTS with :NON-IGNORABLE
|
|
118
|
132
|
must match the expected key in the line's comment."
|
|
119
|
133
|
(:tag :unicode)
|
|
120
|
|
- (let ((ducet (ducet)))
|
|
121
|
|
- (with-open-file (s *collation-non-ignorable-test* :direction :input
|
|
122
|
|
- :external-format :utf-8)
|
|
123
|
|
- (loop for line = (read-line s nil nil)
|
|
124
|
|
- while line
|
|
125
|
|
- do
|
|
126
|
|
- (multiple-value-bind (cps e1 e2 e3)
|
|
127
|
|
- (collation-parse-test-line line)
|
|
128
|
|
- (when cps
|
|
129
|
|
- (multiple-value-bind (g1 g2 g3)
|
|
130
|
|
- (lisp::collation-weights ducet (collation-test-string cps)
|
|
131
|
|
- :non-ignorable)
|
|
132
|
|
- (assert-equalp (list e1 e2 e3)
|
|
133
|
|
- (list g1 g2 g3)
|
|
134
|
|
- cps)))))))) |
|
|
134
|
+ (run-collation-conformance (ducet) *collation-non-ignorable-test*
|
|
|
135
|
+ :non-ignorable)) |