Raymond Toy pushed to branch master at cmucl / cmucl
Commits: c9ce7574 by Raymond Toy at 2023-08-17T13:36:18+00:00 Address #240: Speed up union by using a hashtable
- - - - - 5c7536f0 by Raymond Toy at 2023-08-17T13:36:44+00:00 Merge branch 'issue-240-union-with-hash-table' into 'master'
Address #240: Speed up union by using a hashtable
Closes #240
See merge request cmucl/cmucl!159 - - - - -
2 changed files:
- src/code/list.lisp - tests/sets.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -785,6 +785,7 @@ ;;; will apply the test to the elements from list1 and list2 in the correct ;;; order. ;;; +#+nil (defun union (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Returns the union of list1 and list2." (declare (inline member)) @@ -795,6 +796,23 @@ (push elt res))) res))
+(defun union (list1 list2 &key key (test #'eql testp) (test-not nil notp)) + "Returns the union of list1 and list2." + (declare (inline member)) + (when (and testp notp) + (error (intl:gettext "Test and test-not both supplied."))) + (let ((res list2) + (hashtable (list-to-hashtable list2 key test test-not))) + (cond (hashtable + (dolist (item list1) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (push item res)))) + ((null hashtable) + (dolist (item list1) + (unless (with-set-keys (member (apply-key key item) list2)) + (push item res))))) + res)) + ;;; Destination and source are setf-able and many-evaluable. Sets the source ;;; to the cdr, and "conses" the 1st elt of source to destination. ;;;
===================================== tests/sets.lisp ===================================== @@ -89,3 +89,86 @@ :test-not 'eql)))
+ +(define-test union.hash-eql + (:tag :issues) + ;; For union to use hashtables by making the threshold + ;; small. + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '(2 2 1 3 4) + (union '(1 2 2 3) '(3 4))) + (assert-equal '(2 2 1 3 4 5 6 7 8) + (union '(1 2 2 3) '(3 4 5 6 7 8))) + (assert-equal '(2 2 1 3 4) + (union '(1 2 2 3) '(3 4) + :test #'eql)) + (assert-equal '(2 2 1 3 4 5 6 7 8) + (union '(1 2 2 3) '(3 4 5 6 7 8) + :test #'eql)))) + +(define-test union.hash-eq + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '(b b a c d e) + (union '(a b b c) '(c d e) :test 'eq)) + (assert-equal '(b b a c d e f g h) + (union '(a b b c) '(c d e f g h) :test 'eq)) + (assert-equal '(b b a c d e) + (union '(a b b c) '(c d e) :test #'eq)) + (assert-equal '(b b a c d e f g h) + (union '(a b b c) '(c d e f g h) :test #'eq)))) + +(define-test union.hash-equal + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '("b" "b" "a" "c" "d" "e") + (union '("a" "b" "b" "c") + '("c" "d" "e") + :test 'equal)) + (assert-equal '("b" "b" "a" "c" "d" "e" "f" "g" "h") + (union '("a" "b" "b" "c") + '("c" "d" "e" "f" "g" "h") + :test 'equal)) + (assert-equal '("b" "b" "a" "c" "d" "e") + (union '("a" "b" "b" "c") + '("c" "d" "e") + :test #'equal)) + (assert-equal '("b" "b" "a" "c" "d" "e" "f" "g" "h") + (union '("a" "b" "b" "c") + '("c" "d" "e" "f" "g" "h") + :test #'equal)))) + +(define-test union.hash-equalp + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '("b" "b" "a" "C" "d" "e") + (union '("a" "b" "b" "c") + '("C" "d" "e") + :test 'equalp)) + (assert-equal '("b" "b" "a" "c" "D" "e" "f" "g" "h") + (union '("a" "b" "b" "C") + '("c" "D" "e" "f" "g" "h") + :test 'equalp)) + (assert-equal '("b" "b" "a" "C" "d" "e") + (union '("a" "b" "b" "c") + '("C" "d" "e") + :test #'equalp)) + (assert-equal '("b" "b" "a" "c" "D" "e" "f" "g" "h") + (union '("a" "b" "b" "C") + '("c" "D" "e" "f" "g" "h") + :test #'equalp)))) + +;; Simple test that we handle a key correctly +(define-test union.hash-eql-with-key + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '((3 "b") (2 "b") (1 "a") (4 "c") (5 "d")) + (union '((1 "a") (2 "b") (3 "b")) + '((1 "a") (4 "c") (5 "d")) + :key #'first)))) + +(define-test union.test-and-test-not + (assert-error 'simple-error + (union '(1 2) + '(3 4) + :test 'eql + :test-not 'eql)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/14d847f0523f21a3d3db82f...