Raymond Toy pushed to branch rtoy-unicode-collation-ducet at cmucl / cmucl

Commits:

1 changed file:

Changes:

  • tests/unicode-collation.lisp
    ... ... @@ -89,26 +89,40 @@ the BMP as UTF-16 surrogate pairs."
    89 89
               (vector-push-extend (code-char cp) out)))
    
    90 90
         (coerce out 'simple-string)))
    
    91 91
     
    
    92
    +(defun run-collation-conformance (ducet file weighting)
    
    93
    +  "Check every line of the UCA conformance FILE: the four sort-key levels
    
    94
    +produced by LISP::COLLATION-WEIGHTS under WEIGHTING must match the
    
    95
    +expected key parsed from the line's comment.  Each line is a separate
    
    96
    +LISP-UNIT assertion.
    
    97
    +
    
    98
    +This is a plain function rather than inline in the DEFINE-TESTs below
    
    99
    +because a DEFINE-TEST body is stored as source and run interpreted; the
    
    100
    +per-line work over a quarter-million lines must run compiled, so it
    
    101
    +lives here and the tests just call it."
    
    102
    +  (with-open-file (s file :direction :input :external-format :utf-8)
    
    103
    +    (loop for line = (read-line s nil nil)
    
    104
    +          while line
    
    105
    +          do
    
    106
    +             (multiple-value-bind (cps e1 e2 e3 e4)
    
    107
    +                 (collation-parse-test-line line)
    
    108
    +               (when cps
    
    109
    +                 (multiple-value-bind (g1 g2 g3 g4)
    
    110
    +                     (lisp::collation-weights ducet (collation-test-string cps)
    
    111
    +                                              weighting)
    
    112
    +                   ;; For :NON-IGNORABLE the comment has no fourth level
    
    113
    +                   ;; and COLLATION-WEIGHTS returns NIL for L4, so the
    
    114
    +                   ;; same four-level comparison serves both options.
    
    115
    +                   (assert-equalp (list e1 e2 e3 e4)
    
    116
    +                                  (list g1 g2 g3 g4)
    
    117
    +                                  cps)))))))
    
    118
    +
    
    92 119
     (define-test unicode.collation-shifted
    
    93 120
       "Test UTS #10 collation sort keys against the UCA SHIFTED conformance
    
    94 121
     data.  For each line, the four sort-key levels produced by
    
    95 122
     LISP::COLLATION-WEIGHTS must match the expected key in the line's
    
    96 123
     comment."
    
    97 124
       (:tag :unicode)
    
    98
    -  (let ((ducet (ducet)))
    
    99
    -    (with-open-file (s *collation-shifted-test* :direction :input
    
    100
    -                       :external-format :utf-8)
    
    101
    -      (loop for line = (read-line s nil nil)
    
    102
    -            while line
    
    103
    -            do
    
    104
    -               (multiple-value-bind (cps e1 e2 e3 e4)
    
    105
    -                   (collation-parse-test-line line)
    
    106
    -                 (when cps
    
    107
    -                   (multiple-value-bind (g1 g2 g3 g4)
    
    108
    -                       (lisp::collation-weights ducet (collation-test-string cps))
    
    109
    -                     (assert-equalp (list e1 e2 e3 e4)
    
    110
    -                                    (list g1 g2 g3 g4)
    
    111
    -                                    cps))))))))
    
    125
    +  (run-collation-conformance (ducet) *collation-shifted-test* :shifted))
    
    112 126
     
    
    113 127
     (define-test unicode.collation-non-ignorable
    
    114 128
       "Test UTS #10 collation sort keys against the UCA NON_IGNORABLE
    
    ... ... @@ -117,18 +131,5 @@ their weights and there is no fourth level, so for each line the three
    117 131
     weight levels produced by LISP::COLLATION-WEIGHTS with :NON-IGNORABLE
    
    118 132
     must match the expected key in the line's comment."
    
    119 133
       (:tag :unicode)
    
    120
    -  (let ((ducet (ducet)))
    
    121
    -    (with-open-file (s *collation-non-ignorable-test* :direction :input
    
    122
    -                       :external-format :utf-8)
    
    123
    -      (loop for line = (read-line s nil nil)
    
    124
    -            while line
    
    125
    -            do
    
    126
    -               (multiple-value-bind (cps e1 e2 e3)
    
    127
    -                   (collation-parse-test-line line)
    
    128
    -                 (when cps
    
    129
    -                   (multiple-value-bind (g1 g2 g3)
    
    130
    -                       (lisp::collation-weights ducet (collation-test-string cps)
    
    131
    -                                                :non-ignorable)
    
    132
    -                     (assert-equalp (list e1 e2 e3)
    
    133
    -                                    (list g1 g2 g3)
    
    134
    -                                    cps))))))))
    134
    +  (run-collation-conformance (ducet) *collation-non-ignorable-test*
    
    135
    +                             :non-ignorable))