| ... |
... |
@@ -467,18 +467,28 @@ is no fourth level. See COLLATION-WEIGHTS." |
|
467
|
467
|
(unless (zerop te) (push te l3))))
|
|
468
|
468
|
(values (nreverse l1) (nreverse l2) (nreverse l3) nil)))
|
|
469
|
469
|
|
|
470
|
|
-(defun collation-sort-key (d string &optional (variable-weighting :shifted))
|
|
|
470
|
+(defun collation-sort-key (d string &optional (variable-weighting :shifted)
|
|
|
471
|
+ (strength :tertiary))
|
|
471
|
472
|
"Compute the UTS #10 sort key for STRING under DUCET D. Returns a
|
|
472
|
|
-(simple-array (unsigned-byte 16) (*)) holding the level-1 weights, a 0000
|
|
473
|
|
-separator, the level-2 weights, 0000, the level-3 weights, and -- for the
|
|
474
|
|
-:SHIFTED option -- a further 0000 separator and the level-4 weights.
|
|
475
|
|
-Binary comparison of two such keys yields the collation order of their
|
|
476
|
|
-strings. VARIABLE-WEIGHTING is as in COLLATION-WEIGHTS."
|
|
|
473
|
+(simple-array (unsigned-byte 16) (*)) holding the weight levels separated
|
|
|
474
|
+by 0000: level 1, level 2, level 3, and -- under the :SHIFTED option --
|
|
|
475
|
+level 4. Binary comparison of two such keys yields the collation order
|
|
|
476
|
+of their strings. VARIABLE-WEIGHTING is as in COLLATION-WEIGHTS.
|
|
|
477
|
+
|
|
|
478
|
+STRENGTH bounds the levels included in the key, and hence the
|
|
|
479
|
+distinctions the comparison makes: :PRIMARY (base letters only),
|
|
|
480
|
+:SECONDARY (also accents), :TERTIARY (also case; the default), or
|
|
|
481
|
+:QUATERNARY (also the level-4 weights, which exist only under the
|
|
|
482
|
+:SHIFTED option and otherwise add nothing). A lower strength makes
|
|
|
483
|
+more strings compare equal; for example :SECONDARY ignores case."
|
|
477
|
484
|
(multiple-value-bind (l1 l2 l3 l4)
|
|
478
|
485
|
(collation-weights d string variable-weighting)
|
|
479
|
|
- (let* ((weights (if (eq variable-weighting :non-ignorable)
|
|
480
|
|
- (append l1 (list 0) l2 (list 0) l3)
|
|
481
|
|
- (append l1 (list 0) l2 (list 0) l3 (list 0) l4)))
|
|
|
486
|
+ (let* ((weights (ecase strength
|
|
|
487
|
+ (:primary l1)
|
|
|
488
|
+ (:secondary (append l1 (list 0) l2))
|
|
|
489
|
+ (:tertiary (append l1 (list 0) l2 (list 0) l3))
|
|
|
490
|
+ (:quaternary
|
|
|
491
|
+ (append l1 (list 0) l2 (list 0) l3 (list 0) l4))))
|
|
482
|
492
|
(key (make-array (length weights)
|
|
483
|
493
|
:element-type '(unsigned-byte 16))))
|
|
484
|
494
|
(loop for w in weights
|
| ... |
... |
@@ -486,12 +496,13 @@ strings. VARIABLE-WEIGHTING is as in COLLATION-WEIGHTS." |
|
486
|
496
|
do (setf (aref key k) w))
|
|
487
|
497
|
key)))
|
|
488
|
498
|
|
|
489
|
|
-(defun collation-compare (d s1 s2 &optional (variable-weighting :shifted))
|
|
|
499
|
+(defun collation-compare (d s1 s2 &optional (variable-weighting :shifted)
|
|
|
500
|
+ (strength :tertiary))
|
|
490
|
501
|
"Compare strings S1 and S2 under DUCET D. Returns -1, 0, or 1 like a
|
|
491
|
502
|
three-way comparison: negative if S1 sorts before S2, zero if equal, 1
|
|
492
|
|
-if after. VARIABLE-WEIGHTING is as in COLLATION-WEIGHTS."
|
|
493
|
|
- (let ((k1 (collation-sort-key d s1 variable-weighting))
|
|
494
|
|
- (k2 (collation-sort-key d s2 variable-weighting)))
|
|
|
503
|
+if after. VARIABLE-WEIGHTING and STRENGTH are as in COLLATION-SORT-KEY."
|
|
|
504
|
+ (let ((k1 (collation-sort-key d s1 variable-weighting strength))
|
|
|
505
|
+ (k2 (collation-sort-key d s2 variable-weighting strength)))
|
|
495
|
506
|
(let ((n (min (length k1) (length k2))))
|
|
496
|
507
|
(dotimes (i n)
|
|
497
|
508
|
(let ((a (aref k1 i)) (b (aref k2 i)))
|
| ... |
... |
@@ -615,7 +626,7 @@ collation section of unidata.bin on first use." |
|
615
|
626
|
(setf *collation-table* (lisp::unidata-ducet))))
|
|
616
|
627
|
|
|
617
|
628
|
(defun %collation-compare (string1 string2 start1 end1 start2 end2
|
|
618
|
|
- variable-weighting)
|
|
|
629
|
+ variable-weighting strength)
|
|
619
|
630
|
"Three-way collation comparison of the designated substrings of
|
|
620
|
631
|
STRING1 and STRING2: returns a negative integer, zero, or a positive
|
|
621
|
632
|
integer as the first sorts before, equal to, or after the second."
|
| ... |
... |
@@ -625,47 +636,87 @@ integer as the first sorts before, equal to, or after the second." |
|
625
|
636
|
(setf s1 (subseq s1 start1 end1)))
|
|
626
|
637
|
(when (or (/= start2 0) end2)
|
|
627
|
638
|
(setf s2 (subseq s2 start2 end2)))
|
|
628
|
|
- (lisp::collation-compare (collation-table) s1 s2 variable-weighting)))
|
|
|
639
|
+ (lisp::collation-compare (collation-table) s1 s2
|
|
|
640
|
+ variable-weighting strength)))
|
|
629
|
641
|
|
|
630
|
|
-(defmacro %def-collation-predicate (name test docstring)
|
|
|
642
|
+(defmacro %def-collation-predicate (name test default-strength docstring)
|
|
631
|
643
|
"Define a collation comparison predicate NAME whose result is (TEST c)
|
|
632
|
|
-where c is the three-way comparison of the two string arguments."
|
|
|
644
|
+where c is the three-way comparison of the two string arguments.
|
|
|
645
|
+DEFAULT-STRENGTH is the default value of the STRENGTH keyword."
|
|
633
|
646
|
`(defun ,name (string1 string2 &key (start1 0) end1 (start2 0) end2
|
|
634
|
|
- (variable-weighting :shifted))
|
|
|
647
|
+ (variable-weighting :shifted)
|
|
|
648
|
+ (strength ,default-strength))
|
|
635
|
649
|
,docstring
|
|
636
|
650
|
(let ((c (%collation-compare string1 string2
|
|
637
|
651
|
start1 end1 start2 end2
|
|
638
|
|
- variable-weighting)))
|
|
|
652
|
+ variable-weighting strength)))
|
|
639
|
653
|
(,test c))))
|
|
640
|
654
|
|
|
641
|
|
-(%def-collation-predicate string= zerop
|
|
|
655
|
+(%def-collation-predicate string= zerop :tertiary
|
|
642
|
656
|
"Return true if STRING1 and STRING2 collate as equal under the Unicode
|
|
643
|
657
|
Collation Algorithm. Note that this is collation equality, not
|
|
644
|
658
|
code-point identity: canonically equivalent strings, and strings that
|
|
645
|
659
|
differ only in collation-ignorable ways, compare equal. START1, END1,
|
|
646
|
|
-START2 and END2 bound the substrings compared; VARIABLE-WEIGHTING is
|
|
647
|
|
-:SHIFTED (the default) or :NON-IGNORABLE.")
|
|
|
660
|
+START2 and END2 bound the substrings compared. VARIABLE-WEIGHTING is
|
|
|
661
|
+:SHIFTED (the default) or :NON-IGNORABLE. STRENGTH is :PRIMARY,
|
|
|
662
|
+:SECONDARY, :TERTIARY (the default), or :QUATERNARY, as in
|
|
|
663
|
+LISP::COLLATION-SORT-KEY; a lower strength makes more strings compare
|
|
|
664
|
+equal -- :SECONDARY, for instance, ignores case.")
|
|
648
|
665
|
|
|
649
|
|
-(%def-collation-predicate string/= (lambda (c) (not (zerop c)))
|
|
|
666
|
+(%def-collation-predicate string/= (lambda (c) (not (zerop c))) :tertiary
|
|
650
|
667
|
"Return true if STRING1 and STRING2 do not collate as equal. See
|
|
651
|
668
|
UNICODE:STRING= for the meaning of the keyword arguments.")
|
|
652
|
669
|
|
|
653
|
|
-(%def-collation-predicate string< minusp
|
|
|
670
|
+(%def-collation-predicate string< minusp :tertiary
|
|
654
|
671
|
"Return true if STRING1 collates before STRING2 under the Unicode
|
|
655
|
672
|
Collation Algorithm. See UNICODE:STRING= for the meaning of the keyword
|
|
656
|
673
|
arguments.")
|
|
657
|
674
|
|
|
658
|
|
-(%def-collation-predicate string> plusp
|
|
|
675
|
+(%def-collation-predicate string> plusp :tertiary
|
|
659
|
676
|
"Return true if STRING1 collates after STRING2 under the Unicode
|
|
660
|
677
|
Collation Algorithm. See UNICODE:STRING= for the meaning of the keyword
|
|
661
|
678
|
arguments.")
|
|
662
|
679
|
|
|
663
|
|
-(%def-collation-predicate string<= (lambda (c) (not (plusp c)))
|
|
|
680
|
+(%def-collation-predicate string<= (lambda (c) (not (plusp c))) :tertiary
|
|
664
|
681
|
"Return true if STRING1 collates before or equal to STRING2 under the
|
|
665
|
682
|
Unicode Collation Algorithm. See UNICODE:STRING= for the meaning of the
|
|
666
|
683
|
keyword arguments.")
|
|
667
|
684
|
|
|
668
|
|
-(%def-collation-predicate string>= (lambda (c) (not (minusp c)))
|
|
|
685
|
+(%def-collation-predicate string>= (lambda (c) (not (minusp c))) :tertiary
|
|
669
|
686
|
"Return true if STRING1 collates after or equal to STRING2 under the
|
|
670
|
687
|
Unicode Collation Algorithm. See UNICODE:STRING= for the meaning of the
|
|
671
|
688
|
keyword arguments.")
|
|
|
689
|
+
|
|
|
690
|
+;;; The case-insensitive comparison functions, the Unicode analogs of
|
|
|
691
|
+;;; the COMMON-LISP -EQUAL/-LESSP/... family. They default to :SECONDARY
|
|
|
692
|
+;;; strength, which drops the tertiary level where case is encoded, so
|
|
|
693
|
+;;; they ignore case (and other tertiary distinctions, such as width)
|
|
|
694
|
+;;; while remaining sensitive to base letters and accents. This is the
|
|
|
695
|
+;;; closest collation analog of case-folded comparison; the Unicode
|
|
|
696
|
+;;; Collation Algorithm has no operation that folds case alone.
|
|
|
697
|
+
|
|
|
698
|
+(%def-collation-predicate string-equal zerop :secondary
|
|
|
699
|
+ "Return true if STRING1 and STRING2 collate as equal ignoring case,
|
|
|
700
|
+under the Unicode Collation Algorithm. Like UNICODE:STRING= but
|
|
|
701
|
+defaulting to :SECONDARY strength; see it for the keyword arguments.")
|
|
|
702
|
+
|
|
|
703
|
+(%def-collation-predicate string-not-equal (lambda (c) (not (zerop c))) :secondary
|
|
|
704
|
+ "Return true if STRING1 and STRING2 do not collate as equal ignoring
|
|
|
705
|
+case. See UNICODE:STRING-EQUAL for the keyword arguments.")
|
|
|
706
|
+
|
|
|
707
|
+(%def-collation-predicate string-lessp minusp :secondary
|
|
|
708
|
+ "Return true if STRING1 collates before STRING2 ignoring case, under
|
|
|
709
|
+the Unicode Collation Algorithm. See UNICODE:STRING-EQUAL for the
|
|
|
710
|
+keyword arguments.")
|
|
|
711
|
+
|
|
|
712
|
+(%def-collation-predicate string-greaterp plusp :secondary
|
|
|
713
|
+ "Return true if STRING1 collates after STRING2 ignoring case. See
|
|
|
714
|
+UNICODE:STRING-EQUAL for the keyword arguments.")
|
|
|
715
|
+
|
|
|
716
|
+(%def-collation-predicate string-not-greaterp (lambda (c) (not (plusp c))) :secondary
|
|
|
717
|
+ "Return true if STRING1 collates before or equal to STRING2 ignoring
|
|
|
718
|
+case. See UNICODE:STRING-EQUAL for the keyword arguments.")
|
|
|
719
|
+
|
|
|
720
|
+(%def-collation-predicate string-not-lessp (lambda (c) (not (minusp c))) :secondary
|
|
|
721
|
+ "Return true if STRING1 collates after or equal to STRING2 ignoring
|
|
|
722
|
+case. See UNICODE:STRING-EQUAL for the keyword arguments.") |