| ... |
... |
@@ -500,3 +500,93 @@ if after. VARIABLE-WEIGHTING is as in COLLATION-WEIGHTS." |
|
500
|
500
|
(cond ((< (length k1) (length k2)) -1)
|
|
501
|
501
|
((> (length k1) (length k2)) 1)
|
|
502
|
502
|
(t 0)))))
|
|
|
503
|
+
|
|
|
504
|
+
|
|
|
505
|
+;;; -------------------------------------------------------------------
|
|
|
506
|
+;;; Public collation API (UNICODE package): the Unicode-aware
|
|
|
507
|
+;;; equivalents of the COMMON-LISP string comparison functions.
|
|
|
508
|
+;;;
|
|
|
509
|
+;;; These compare strings by the Unicode Collation Algorithm rather than
|
|
|
510
|
+;;; by code-point order, so the result reflects linguistic sort order
|
|
|
511
|
+;;; (after NFD normalization, with contractions, expansions and the
|
|
|
512
|
+;;; chosen variable-weighting option). The Default Unicode Collation
|
|
|
513
|
+;;; Element Table is loaded lazily on first use.
|
|
|
514
|
+;;;
|
|
|
515
|
+;;; Unlike the COMMON-LISP functions, these return a generalized boolean
|
|
|
516
|
+;;; (T or NIL) rather than a mismatch index: the comparison is performed
|
|
|
517
|
+;;; on sort keys derived from the whole normalized string, so there is
|
|
|
518
|
+;;; no meaningful character index of the first difference to return.
|
|
|
519
|
+;;; -------------------------------------------------------------------
|
|
|
520
|
+
|
|
|
521
|
+(in-package "UNICODE")
|
|
|
522
|
+
|
|
|
523
|
+(defvar *collation-table-path* "ext-formats:allkeys.txt"
|
|
|
524
|
+ "Pathname of the DUCET data file (allkeys.txt) from which the default
|
|
|
525
|
+collation table is loaded on first use.")
|
|
|
526
|
+
|
|
|
527
|
+(defvar *collation-table* nil
|
|
|
528
|
+ "The default Unicode collation table, or NIL if it has not yet been
|
|
|
529
|
+loaded. Loaded lazily from *COLLATION-TABLE-PATH* the first time a
|
|
|
530
|
+collation function needs it. Set to NIL to force a reload.")
|
|
|
531
|
+
|
|
|
532
|
+(defun collation-table ()
|
|
|
533
|
+ "Return the default Unicode collation table, loading it from
|
|
|
534
|
+*COLLATION-TABLE-PATH* on first use."
|
|
|
535
|
+ (or *collation-table*
|
|
|
536
|
+ (setf *collation-table* (lisp::load-ducet *collation-table-path*))))
|
|
|
537
|
+
|
|
|
538
|
+(defun %collation-compare (string1 string2 start1 end1 start2 end2
|
|
|
539
|
+ variable-weighting)
|
|
|
540
|
+ "Three-way collation comparison of the designated substrings of
|
|
|
541
|
+STRING1 and STRING2: returns a negative integer, zero, or a positive
|
|
|
542
|
+integer as the first sorts before, equal to, or after the second."
|
|
|
543
|
+ (let ((s1 (string string1))
|
|
|
544
|
+ (s2 (string string2)))
|
|
|
545
|
+ (when (or (/= start1 0) end1)
|
|
|
546
|
+ (setf s1 (subseq s1 start1 end1)))
|
|
|
547
|
+ (when (or (/= start2 0) end2)
|
|
|
548
|
+ (setf s2 (subseq s2 start2 end2)))
|
|
|
549
|
+ (lisp::collation-compare (collation-table) s1 s2 variable-weighting)))
|
|
|
550
|
+
|
|
|
551
|
+(defmacro %def-collation-predicate (name test docstring)
|
|
|
552
|
+ "Define a collation comparison predicate NAME whose result is (TEST c)
|
|
|
553
|
+where c is the three-way comparison of the two string arguments."
|
|
|
554
|
+ `(defun ,name (string1 string2 &key (start1 0) end1 (start2 0) end2
|
|
|
555
|
+ (variable-weighting :shifted))
|
|
|
556
|
+ ,docstring
|
|
|
557
|
+ (let ((c (%collation-compare string1 string2
|
|
|
558
|
+ start1 end1 start2 end2
|
|
|
559
|
+ variable-weighting)))
|
|
|
560
|
+ (,test c))))
|
|
|
561
|
+
|
|
|
562
|
+(%def-collation-predicate string= zerop
|
|
|
563
|
+ "Return true if STRING1 and STRING2 collate as equal under the Unicode
|
|
|
564
|
+Collation Algorithm. Note that this is collation equality, not
|
|
|
565
|
+code-point identity: canonically equivalent strings, and strings that
|
|
|
566
|
+differ only in collation-ignorable ways, compare equal. START1, END1,
|
|
|
567
|
+START2 and END2 bound the substrings compared; VARIABLE-WEIGHTING is
|
|
|
568
|
+:SHIFTED (the default) or :NON-IGNORABLE.")
|
|
|
569
|
+
|
|
|
570
|
+(%def-collation-predicate string/= (lambda (c) (not (zerop c)))
|
|
|
571
|
+ "Return true if STRING1 and STRING2 do not collate as equal. See
|
|
|
572
|
+UNICODE:STRING= for the meaning of the keyword arguments.")
|
|
|
573
|
+
|
|
|
574
|
+(%def-collation-predicate string< minusp
|
|
|
575
|
+ "Return true if STRING1 collates before STRING2 under the Unicode
|
|
|
576
|
+Collation Algorithm. See UNICODE:STRING= for the meaning of the keyword
|
|
|
577
|
+arguments.")
|
|
|
578
|
+
|
|
|
579
|
+(%def-collation-predicate string> plusp
|
|
|
580
|
+ "Return true if STRING1 collates after STRING2 under the Unicode
|
|
|
581
|
+Collation Algorithm. See UNICODE:STRING= for the meaning of the keyword
|
|
|
582
|
+arguments.")
|
|
|
583
|
+
|
|
|
584
|
+(%def-collation-predicate string<= (lambda (c) (not (plusp c)))
|
|
|
585
|
+ "Return true if STRING1 collates before or equal to STRING2 under the
|
|
|
586
|
+Unicode Collation Algorithm. See UNICODE:STRING= for the meaning of the
|
|
|
587
|
+keyword arguments.")
|
|
|
588
|
+
|
|
|
589
|
+(%def-collation-predicate string>= (lambda (c) (not (minusp c)))
|
|
|
590
|
+ "Return true if STRING1 collates after or equal to STRING2 under the
|
|
|
591
|
+Unicode Collation Algorithm. See UNICODE:STRING= for the meaning of the
|
|
|
592
|
+keyword arguments.") |