Raymond Toy pushed to branch issue-240-clean-up-hashtable-impl at cmucl / cmucl

Commits:

4 changed files:

Changes:

  • src/code/list.lisp
    ... ... @@ -749,6 +749,7 @@
    749 749
     (defparameter *min-list-length-for-hashtable*
    
    750 750
       15)
    
    751 751
     
    
    752
    +(declaim (start-block list-to-hashtable union intersection set-difference))
    
    752 753
     ;; Convert a list to a hashtable.  The hashtable does not handle
    
    753 754
     ;; duplicated values in the list.  Returns the hashtable.
    
    754 755
     (defun list-to-hashtable (list key test test-not)
    
    ... ... @@ -803,29 +804,6 @@
    803 804
                  (process (with-set-keys (member (apply-key key item) list2)))))
    
    804 805
           res)))
    
    805 806
     
    
    806
    -;;; Destination and source are setf-able and many-evaluable.  Sets the source
    
    807
    -;;; to the cdr, and "conses" the 1st elt of source to destination.
    
    808
    -;;;
    
    809
    -(defmacro steve-splice (source destination)
    
    810
    -  `(let ((temp ,source))
    
    811
    -     (setf ,source (cdr ,source)
    
    812
    -	   (cdr temp) ,destination
    
    813
    -	   ,destination temp)))
    
    814
    -
    
    815
    -(defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp))
    
    816
    -  "Destructively returns the union list1 and list2."
    
    817
    -  (declare (inline member))
    
    818
    -  (if (and testp notp)
    
    819
    -      (error "Test and test-not both supplied."))
    
    820
    -  (let ((res list2)
    
    821
    -	(list1 list1))
    
    822
    -    (do ()
    
    823
    -	((endp list1))
    
    824
    -      (if (not (with-set-keys (member (apply-key key (car list1)) list2)))
    
    825
    -	  (steve-splice list1 res)
    
    826
    -	  (setf list1 (cdr list1))))
    
    827
    -    res))
    
    828
    -  
    
    829 807
     
    
    830 808
     (defun intersection (list1 list2 &key key
    
    831 809
     			   (test #'eql testp) (test-not nil notp))
    
    ... ... @@ -847,20 +825,6 @@
    847 825
                 (t
    
    848 826
                  (process (with-set-keys (member (apply-key key item) list2))))))))
    
    849 827
     
    
    850
    -(defun nintersection (list1 list2 &key key
    
    851
    -			    (test #'eql testp) (test-not nil notp))
    
    852
    -  "Destructively returns the intersection of list1 and list2."
    
    853
    -  (declare (inline member))
    
    854
    -  (if (and testp notp)
    
    855
    -      (error "Test and test-not both supplied."))
    
    856
    -  (let ((res nil)
    
    857
    -	(list1 list1))
    
    858
    -    (do () ((endp list1))
    
    859
    -      (if (with-set-keys (member (apply-key key (car list1)) list2))
    
    860
    -	  (steve-splice list1 res)
    
    861
    -	  (setq list1 (Cdr list1))))
    
    862
    -    res))
    
    863
    -
    
    864 828
     (defun set-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp))
    
    865 829
       "Returns the elements of list1 which are not in list2."
    
    866 830
       (declare (inline member))
    
    ... ... @@ -888,6 +852,45 @@
    888 852
                  (process (with-set-keys (member (apply-key key item) list2))))))))
    
    889 853
     
    
    890 854
     
    
    855
    +(declaim (end-block))
    
    856
    +
    
    857
    +;;; Destination and source are setf-able and many-evaluable.  Sets the source
    
    858
    +;;; to the cdr, and "conses" the 1st elt of source to destination.
    
    859
    +;;;
    
    860
    +(defmacro steve-splice (source destination)
    
    861
    +  `(let ((temp ,source))
    
    862
    +     (setf ,source (cdr ,source)
    
    863
    +	   (cdr temp) ,destination
    
    864
    +	   ,destination temp)))
    
    865
    +
    
    866
    +(defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp))
    
    867
    +  "Destructively returns the union list1 and list2."
    
    868
    +  (declare (inline member))
    
    869
    +  (if (and testp notp)
    
    870
    +      (error "Test and test-not both supplied."))
    
    871
    +  (let ((res list2)
    
    872
    +	(list1 list1))
    
    873
    +    (do ()
    
    874
    +	((endp list1))
    
    875
    +      (if (not (with-set-keys (member (apply-key key (car list1)) list2)))
    
    876
    +	  (steve-splice list1 res)
    
    877
    +	  (setf list1 (cdr list1))))
    
    878
    +    res))
    
    879
    +  
    
    880
    +(defun nintersection (list1 list2 &key key
    
    881
    +			    (test #'eql testp) (test-not nil notp))
    
    882
    +  "Destructively returns the intersection of list1 and list2."
    
    883
    +  (declare (inline member))
    
    884
    +  (if (and testp notp)
    
    885
    +      (error "Test and test-not both supplied."))
    
    886
    +  (let ((res nil)
    
    887
    +	(list1 list1))
    
    888
    +    (do () ((endp list1))
    
    889
    +      (if (with-set-keys (member (apply-key key (car list1)) list2))
    
    890
    +	  (steve-splice list1 res)
    
    891
    +	  (setq list1 (Cdr list1))))
    
    892
    +    res))
    
    893
    +
    
    891 894
     (defun nset-difference (list1 list2 &key key
    
    892 895
     			      (test #'eql testp) (test-not nil notp))
    
    893 896
       "Destructively returns the elements of list1 which are not in list2."
    
    ... ... @@ -989,14 +992,69 @@
    989 992
     	      (rplacd splicex (cdr x)))
    
    990 993
     	  (setq splicex x)))))
    
    991 994
     
    
    995
    +(declaim (start-block shorter-list-to-hashtable subsetp))
    
    996
    +
    
    997
    +(defun shorter-list-to-hashtable (list1 list2 key test test-not)
    
    998
    +  ;; Find the shorter list and return the length and the shorter list
    
    999
    +  (when test-not
    
    1000
    +    (return-from shorter-list-to-hashtable nil))
    
    1001
    +  (let ((hash-test (let ((test-fn (if (and (symbolp test)
    
    1002
    +                                           (fboundp test))
    
    1003
    +                                      (fdefinition test)
    
    1004
    +                                      test)))
    
    1005
    +                     (cond ((eql test-fn #'eq) 'eq)
    
    1006
    +                           ((eql test-fn #'eql) 'eql)
    
    1007
    +                           ((eql test-fn #'equal) 'equal)
    
    1008
    +                           ((eql test-fn #'equalp) 'equalp)))))
    
    1009
    +    (unless hash-test
    
    1010
    +      (return-from shorter-list-to-hashtable nil))
    
    1011
    +    (multiple-value-bind (min-length shorter-list)
    
    1012
    +        (do ((len 0 (1+ len))
    
    1013
    +             (lst1 list1 (cdr lst1))
    
    1014
    +             (lst2 list2 (cdr lst2)))
    
    1015
    +            ((or (null lst1) (null lst2))
    
    1016
    +             (values len (if (null lst1) list1 list2))))
    
    1017
    +      (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*)
    
    1018
    +        (return-from shorter-list-to-hashtable nil))
    
    1019
    +      (let ((hashtable (make-hash-table :test hash-test :size min-length)))
    
    1020
    +        (dolist (item shorter-list)
    
    1021
    +          (setf (gethash (apply-key key item) hashtable) item))
    
    1022
    +        (values hashtable shorter-list)))))
    
    1023
    +        
    
    992 1024
     (defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp))
    
    993 1025
       "Returns T if every element in list1 is also in list2."
    
    994 1026
       (declare (inline member))
    
    995
    -  (dolist (elt list1)
    
    996
    -    (unless (with-set-keys (member (apply-key key elt) list2))
    
    997
    -      (return-from subsetp nil)))
    
    998
    -  T)
    
    999
    -
    
    1027
    +  (when (and testp notp)
    
    1028
    +    (error "Test and test-not both supplied."))
    
    1029
    +
    
    1030
    +  ;; SUBSETP is used early in TYPE-INIT where hash tables aren't
    
    1031
    +  ;; available yet, so we can't use hashtables then.  LISPINIT will
    
    1032
    +  ;; take care to disable this for the kernel.core.  SAVE will set
    
    1033
    +  ;; this to true when it's safe to use hash tables for SUBSETP.
    
    1034
    +  (multiple-value-bind (hashtable shorter-list)
    
    1035
    +      (when t
    
    1036
    +        (shorter-list-to-hashtable list1 list2 key test test-not))
    
    1037
    +    (cond (hashtable
    
    1038
    +           (cond ((eq shorter-list list1)
    
    1039
    +                  ;; Remove any item from list2 from the hashtable containing list1.
    
    1040
    +                  (dolist (item list2)
    
    1041
    +                    (remhash (apply-key key item) hashtable))
    
    1042
    +                  ;; If the hash table is now empty, then every
    
    1043
    +                  ;; element in list1 appeared in list2, so list1 is a
    
    1044
    +                  ;; subset of list2.
    
    1045
    +                  (zerop (hash-table-count hashtable)))
    
    1046
    +                 ((eq shorter-list list2)
    
    1047
    +                  (dolist (item list1)
    
    1048
    +                    (unless (nth-value 1 (gethash (apply-key key item) hashtable))
    
    1049
    +                      (return-from subsetp nil)))
    
    1050
    +                  t)))
    
    1051
    +	  (t
    
    1052
    +	   (dolist (item list1)
    
    1053
    +	     (unless (with-set-keys (member (apply-key key item) list2))
    
    1054
    +	       (return-from subsetp nil)))
    
    1055
    +	   T))))
    
    1056
    +
    
    1057
    +(declaim (end-block))
    
    1000 1058
     
    
    1001 1059
     
    
    1002 1060
     ;;; Functions that operate on association lists
    

  • src/code/type.lisp
    ... ... @@ -377,6 +377,14 @@
    377 377
     (cold-load-init (setq *use-implementation-types* t))
    
    378 378
     (declaim (type boolean *use-implementation-types*))
    
    379 379
     
    
    380
    +(defvar *min-list-length-for-subsetp-hashtable* 150
    
    381
    +  "The minimum length of either list argument for subsetp where a
    
    382
    +  hashtable is used to speed up processing instead of using a basic
    
    383
    +  list implementation.  This value was determined by experimentation.")
    
    384
    +
    
    385
    +(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150))
    
    386
    +(declaim (type fixnum *min-list-length-for-subsetp-hashtable*))
    
    387
    +
    
    380 388
     ;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION}  --  Interface
    
    381 389
     ;;;
    
    382 390
     ;;;    These functions are used as method for types which need a complex
    

  • src/i18n/locale/cmucl.pot
    ... ... @@ -1155,6 +1155,13 @@ msgid ""
    1155 1155
     "   affects array types."
    
    1156 1156
     msgstr ""
    
    1157 1157
     
    
    1158
    +#: src/code/type.lisp
    
    1159
    +msgid ""
    
    1160
    +"The minimum length of either list argument for subsetp where a\n"
    
    1161
    +"  hashtable is used to speed up processing instead of using a basic\n"
    
    1162
    +"  list implementation.  This value was determined by experimentation."
    
    1163
    +msgstr ""
    
    1164
    +
    
    1158 1165
     #: src/code/type.lisp
    
    1159 1166
     msgid "Subtypep is illegal on this type:~%  ~S"
    
    1160 1167
     msgstr ""
    
    ... ... @@ -3209,19 +3216,19 @@ msgid "Test and test-not both supplied."
    3209 3216
     msgstr ""
    
    3210 3217
     
    
    3211 3218
     #: src/code/list.lisp
    
    3212
    -msgid "Destructively returns the union list1 and list2."
    
    3219
    +msgid "Returns the intersection of list1 and list2."
    
    3213 3220
     msgstr ""
    
    3214 3221
     
    
    3215 3222
     #: src/code/list.lisp
    
    3216
    -msgid "Returns the intersection of list1 and list2."
    
    3223
    +msgid "Returns the elements of list1 which are not in list2."
    
    3217 3224
     msgstr ""
    
    3218 3225
     
    
    3219 3226
     #: src/code/list.lisp
    
    3220
    -msgid "Destructively returns the intersection of list1 and list2."
    
    3227
    +msgid "Destructively returns the union list1 and list2."
    
    3221 3228
     msgstr ""
    
    3222 3229
     
    
    3223 3230
     #: src/code/list.lisp
    
    3224
    -msgid "Returns the elements of list1 which are not in list2."
    
    3231
    +msgid "Destructively returns the intersection of list1 and list2."
    
    3225 3232
     msgstr ""
    
    3226 3233
     
    
    3227 3234
     #: src/code/list.lisp
    

  • tests/sets.lisp
    ... ... @@ -172,3 +172,54 @@
    172 172
     		       '(3 4)
    
    173 173
     		       :test 'eql
    
    174 174
     		       :test-not 'eql)))
    
    175
    +
    
    176
    +
    
    177
    +(define-test subsetp.hash-eq
    
    178
    +    (:tag :issues)
    
    179
    +  (let ((lisp::*min-list-length-for-hashtable* 2))
    
    180
    +    (assert-true
    
    181
    +     (subsetp '(a b c a) '(a a d d c b) :test 'eq))
    
    182
    +    (assert-true
    
    183
    +     (subsetp '(a b c a b c a b c) '(a a d d c b) :test 'eq))
    
    184
    +    (assert-false
    
    185
    +     (subsetp '(a b c a z) '(a a d d c b) :test 'eq))
    
    186
    +    (assert-false
    
    187
    +     (subsetp '(a b c a b cz) '(a a d d c b) :test 'eq))))
    
    188
    +
    
    189
    +(define-test subsetp.hash-eql
    
    190
    +    (:tag :issues)
    
    191
    +  (let ((lisp::*min-list-length-for-hashtable* 2))
    
    192
    +    (assert-true
    
    193
    +     (subsetp '(a b c a) '(a a d d c b) :test 'eql))
    
    194
    +    (assert-false
    
    195
    +     (subsetp '(a b c a z) '(a a d d c b) :test 'eql))))
    
    196
    +
    
    197
    +(define-test subsetp.hash-equal
    
    198
    +    (:tag :issues)
    
    199
    +  (let ((lisp::*min-list-length-for-hashtable* 2))
    
    200
    +    (assert-true
    
    201
    +     (subsetp '("a" "b" "c" "a") '("a" "a" "d" "d" "c" "b") :test 'equal))
    
    202
    +    (assert-false
    
    203
    +     (subsetp '("a" "b" "c" "a" "z") '("a" "a" "d" "d" "c" "b") :test 'equal))))
    
    204
    +
    
    205
    +(define-test subsetp.hash-equalp
    
    206
    +    (:tag :issues)
    
    207
    +  (let ((lisp::*min-list-length-for-hashtable* 2))
    
    208
    +    (assert-true
    
    209
    +     (subsetp '("a" "b" "C" "A") '("a" "a" "d" "d" "c" "b") :test 'equalp))
    
    210
    +    (assert-false
    
    211
    +     (subsetp '("a" "b" "C" "A" "z") '("a" "a" "d" "d" "c" "b") :test 'equalp))))
    
    212
    +
    
    213
    +(define-test subsetp.hash-eql-with-key
    
    214
    +    (:tag :issues)
    
    215
    +  (assert-true (subsetp '((1 "a") (2 "b") (3 "c"))
    
    216
    +                        '((3 "c") (3 "c") (2 "b") (1 "a"))
    
    217
    +                        :test 'eql
    
    218
    +                        :key #'first)))
    
    219
    +
    
    220
    +(define-test subsetp.test-and-test-not
    
    221
    +  (assert-error 'simple-error
    
    222
    +                (subsetp '(1 2)
    
    223
    +                         '(3 4)
    
    224
    +                         :test 'eql
    
    225
    +                         :test-not 'equal)))