Raymond Toy pushed to branch issue-240-subsetp-with-hash-table at cmucl / cmucl
Commits: 55c01f44 by Raymond Toy at 2023-08-17T13:33:59+00:00 Address #240: Speed up intersection by using a hashtable
- - - - - 14d847f0 by Raymond Toy at 2023-08-17T13:34:15+00:00 Merge branch 'issue-240-intersection-with-hash-table' into 'master'
Address #240: Speed up intersection by using a hashtable
Closes #240
See merge request cmucl/cmucl!160 - - - - - c9ce7574 by Raymond Toy at 2023-08-17T13:36:18+00:00 Address #240: Speed up union by using a hashtable
- - - - - 5c7536f0 by Raymond Toy at 2023-08-17T13:36:44+00:00 Merge branch 'issue-240-union-with-hash-table' into 'master'
Address #240: Speed up union by using a hashtable
Closes #240
See merge request cmucl/cmucl!159 - - - - - 181508a9 by Raymond Toy at 2023-08-17T06:47:03-07:00 Remove old version of union
Oops. Forgot to remove this in !159, so we do it now.
- - - - - 611a0377 by Raymond Toy at 2023-08-17T15:27:54-07:00 Merge branch 'master' into issue-240-subsetp-with-hash-table
- - - - -
2 changed files:
- src/code/list.lisp - tests/sets.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -788,11 +788,18 @@ (defun union (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Returns the union of list1 and list2." (declare (inline member)) - (when (and testp notp) (error (intl:gettext "Test and test-not both supplied."))) - (let ((res list2)) - (dolist (elt list1) - (unless (with-set-keys (member (apply-key key elt) list2)) - (push elt res))) + (when (and testp notp) + (error (intl:gettext "Test and test-not both supplied."))) + (let ((res list2) + (hashtable (list-to-hashtable list2 key test test-not))) + (cond (hashtable + (dolist (item list1) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (push item res)))) + ((null hashtable) + (dolist (item list1) + (unless (with-set-keys (member (apply-key key item) list2)) + (push item res))))) res))
;;; Destination and source are setf-able and many-evaluable. Sets the source @@ -825,11 +832,20 @@ (declare (inline member)) (if (and testp notp) (error "Test and test-not both supplied.")) - (let ((res nil)) - (dolist (elt list1) - (if (with-set-keys (member (apply-key key elt) list2)) - (push elt res))) - res)) + (let ((hashtable + (list-to-hashtable list2 key test test-not))) + (cond (hashtable + (let ((res nil)) + (dolist (item list1) + (when (nth-value 1 (gethash (apply-key key item) hashtable)) + (push item res))) + res)) + ((null hashtable) + (let ((res nil)) + (dolist (elt list1) + (if (with-set-keys (member (apply-key key elt) list2)) + (push elt res))) + res)))))
(defun nintersection (list1 list2 &key key (test #'eql testp) (test-not nil notp))
===================================== tests/sets.lisp ===================================== @@ -89,3 +89,86 @@ :test-not 'eql)))
+ +(define-test union.hash-eql + (:tag :issues) + ;; For union to use hashtables by making the threshold + ;; small. + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '(2 2 1 3 4) + (union '(1 2 2 3) '(3 4))) + (assert-equal '(2 2 1 3 4 5 6 7 8) + (union '(1 2 2 3) '(3 4 5 6 7 8))) + (assert-equal '(2 2 1 3 4) + (union '(1 2 2 3) '(3 4) + :test #'eql)) + (assert-equal '(2 2 1 3 4 5 6 7 8) + (union '(1 2 2 3) '(3 4 5 6 7 8) + :test #'eql)))) + +(define-test union.hash-eq + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '(b b a c d e) + (union '(a b b c) '(c d e) :test 'eq)) + (assert-equal '(b b a c d e f g h) + (union '(a b b c) '(c d e f g h) :test 'eq)) + (assert-equal '(b b a c d e) + (union '(a b b c) '(c d e) :test #'eq)) + (assert-equal '(b b a c d e f g h) + (union '(a b b c) '(c d e f g h) :test #'eq)))) + +(define-test union.hash-equal + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '("b" "b" "a" "c" "d" "e") + (union '("a" "b" "b" "c") + '("c" "d" "e") + :test 'equal)) + (assert-equal '("b" "b" "a" "c" "d" "e" "f" "g" "h") + (union '("a" "b" "b" "c") + '("c" "d" "e" "f" "g" "h") + :test 'equal)) + (assert-equal '("b" "b" "a" "c" "d" "e") + (union '("a" "b" "b" "c") + '("c" "d" "e") + :test #'equal)) + (assert-equal '("b" "b" "a" "c" "d" "e" "f" "g" "h") + (union '("a" "b" "b" "c") + '("c" "d" "e" "f" "g" "h") + :test #'equal)))) + +(define-test union.hash-equalp + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '("b" "b" "a" "C" "d" "e") + (union '("a" "b" "b" "c") + '("C" "d" "e") + :test 'equalp)) + (assert-equal '("b" "b" "a" "c" "D" "e" "f" "g" "h") + (union '("a" "b" "b" "C") + '("c" "D" "e" "f" "g" "h") + :test 'equalp)) + (assert-equal '("b" "b" "a" "C" "d" "e") + (union '("a" "b" "b" "c") + '("C" "d" "e") + :test #'equalp)) + (assert-equal '("b" "b" "a" "c" "D" "e" "f" "g" "h") + (union '("a" "b" "b" "C") + '("c" "D" "e" "f" "g" "h") + :test #'equalp)))) + +;; Simple test that we handle a key correctly +(define-test union.hash-eql-with-key + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '((3 "b") (2 "b") (1 "a") (4 "c") (5 "d")) + (union '((1 "a") (2 "b") (3 "b")) + '((1 "a") (4 "c") (5 "d")) + :key #'first)))) + +(define-test union.test-and-test-not + (assert-error 'simple-error + (union '(1 2) + '(3 4) + :test 'eql + :test-not 'eql)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/a4c90a8bfb0469e1ab17da0...