Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 19a305de by Raymond Toy at 2023-08-16T14:28:11+00:00 Address #240: Speed up set-difference
- - - - - 9d593e3a by Raymond Toy at 2023-08-16T14:28:55+00:00 Merge branch 'issue-240-set-diff-with-hash-table' into 'master'
Address #240: Speed up set-difference
Closes #240
See merge request cmucl/cmucl!153 - - - - -
2 changed files:
- src/code/list.lisp - + tests/sets.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -45,7 +45,7 @@ tree-equal list-length nth %setnth nthcdr last make-list append copy-list copy-alist copy-tree revappend nconc nreconc butlast nbutlast ldiff member member-if member-if-not tailp adjoin union - nunion intersection nintersection set-difference nset-difference + nunion intersection nintersection nset-difference set-exclusive-or nset-exclusive-or subsetp acons pairlis assoc assoc-if assoc-if-not rassoc rassoc-if rassoc-if-not subst subst-if subst-if-not nsubst nsubst-if nsubst-if-not sublis nsublis)) @@ -744,6 +744,39 @@ list (cons item list)))
+;; The minimum length of a list before we can use a hashtable. This +;; was determined experimentally. +(defparameter *min-list-length-for-hashtable* + 15) + +;; Convert a list to a hashtable. The hashtable does not handle +;; duplicated values in the list. Returns the hashtable. +(defun list-to-hashtable (list key test test-not) + ;; Don't currently support test-not when converting a list to a hashtable + (unless test-not + (let ((hash-test (let ((test-fn (if (and (symbolp test) + (fboundp test)) + (fdefinition test) + test))) + (cond ((eql test-fn #'eq) 'eq) + ((eql test-fn #'eql) 'eql) + ((eql test-fn #'equal) 'equal) + ((eql test-fn #'equalp) 'equalp))))) + (unless hash-test + (return-from list-to-hashtable nil)) + ;; If the list is too short, the hashtable makes things + ;; slower. We also need to balance memory usage. + (let ((len 0)) + ;; Compute list length ourselves. + (dolist (item list) + (declare (ignore item)) + (incf len)) + (when (< len *min-list-length-for-hashtable*) + (return-from list-to-hashtable nil)) + (let ((hashtable (make-hash-table :test hash-test :size len))) + (dolist (item list) + (setf (gethash (apply-key key item) hashtable) item)) + hashtable)))))
;;; UNION -- Public. ;;; @@ -812,20 +845,32 @@ (setq list1 (Cdr list1)))) res))
-(defun set-difference (list1 list2 &key key - (test #'eql testp) (test-not nil notp)) +(defun set-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Returns the elements of list1 which are not in list2." (declare (inline member)) (if (and testp notp) (error "Test and test-not both supplied.")) - (if (null list2) - list1 - (let ((res nil)) - (dolist (elt list1) - (if (not (with-set-keys (member (apply-key key elt) list2))) - (push elt res))) - res))) - + ;; Quick exit + (when (null list2) + (return-from set-difference list1)) + + (let ((hashtable + (list-to-hashtable list2 key test test-not))) + (cond (hashtable + ;; list2 was placed in hash table. + (let ((res nil)) + (dolist (item list1) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (push item res))) + res)) + ((null hashtable) + ;; Default implementation because we didn't create the hash + ;; table. + (let ((res nil)) + (dolist (item list1) + (if (not (with-set-keys (member (apply-key key item) list2))) + (push item res))) + res)))))
(defun nset-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp))
===================================== tests/sets.lisp ===================================== @@ -0,0 +1,91 @@ +;; Tests for sets + +(defpackage :sets-tests + (:use :cl :lisp-unit)) + +(in-package "SETS-TESTS") + +(define-test set-diff.hash-eql + (:tag :issues) + ;; For set-difference to use hashtables by making the threshold + ;; small. + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '(2 2 1) + (set-difference '(1 2 2 3) '(3 4))) + (assert-equal '(2 2 1) + (set-difference '(1 2 2 3) '(3 4 5 6 7 8))) + (assert-equal '(2 2 1) + (set-difference '(1 2 2 3) '(3 4) + :test #'eql)) + (assert-equal '(2 2 1) + (set-difference '(1 2 2 3) '(3 4 5 6 7 8) + :test #'eql)))) + +(define-test set-diff.hash-eq + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '(b b a) + (set-difference '(a b b c) '(c d e) :test 'eq)) + (assert-equal '(b b a) + (set-difference '(a b b c) '(c d e f g h) :test 'eq)) + (assert-equal '(b b a) + (set-difference '(a b b c) '(c d e) :test #'eq)) + (assert-equal '(b b a) + (set-difference '(a b b c) '(c d e f g h) :test #'eq)))) + +(define-test set-diff.hash-equal + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "c") + '("c" "d" "e") + :test 'equal)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "c") + '("c" "d" "e" "f" "g" "h") + :test 'equal)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "c") + '("c" "d" "e") + :test #'equal)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "c") + '("c" "d" "e" "f" "g" "h") + :test #'equal)))) + +(define-test set-diff.hash-equalp + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "c") + '("C" "d" "e") + :test 'equalp)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "C") + '("c" "D" "e" "f" "g" "h") + :test 'equalp)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "c") + '("C" "d" "e") + :test #'equalp)) + (assert-equal '("b" "b" "a") + (set-difference '("a" "b" "b" "C") + '("c" "D" "e" "f" "g" "h") + :test #'equalp)))) + +;; Simple test that we handle a key correctly +(define-test set-diff.hash-eql-with-key + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-equal '((3 "b") (2 "b")) + (set-difference '((1 "a") (2 "b") (3 "b")) + '((1 "a") (4 "c") (5 "d")) + :key #'first)))) + +(define-test set-diff.test-and-test-not + (assert-error 'simple-error + (set-difference '(1 2) + '(3 4) + :test 'eql + :test-not 'eql))) + +
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/310e41eb1e70422bb637863...