Raymond Toy pushed to branch issue-240-set-diff-with-hash-table at cmucl / cmucl
Commits: ac30347e by Raymond Toy at 2023-06-19T13:45:44-07:00 Prefer list2 when both lists have the same length
Also add some comments.
- - - - - 57f0451c by Raymond Toy at 2023-06-19T13:48:29-07:00 Allow quick exit in set-difference
Also, now that set-difference is pretty large, don't declare it as maybe-inline.
- - - - -
1 changed file:
- src/code/list.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -45,7 +45,7 @@ tree-equal list-length nth %setnth nthcdr last make-list append copy-list copy-alist copy-tree revappend nconc nreconc butlast nbutlast ldiff member member-if member-if-not tailp adjoin union - nunion intersection nintersection set-difference nset-difference + nunion intersection nintersection nset-difference set-exclusive-or nset-exclusive-or subsetp acons pairlis assoc assoc-if assoc-if-not rassoc rassoc-if rassoc-if-not subst subst-if subst-if-not nsubst nsubst-if nsubst-if-not sublis nsublis)) @@ -744,11 +744,13 @@ list (cons item list)))
+;; The minimum length of a list before we can use a hashtable (defparameter *min-list-length-for-hashtable* 15)
;; Convert a list to a hashtable. Given 2 lists, find the shorter of -;; the two lists and add the shorter list to a hashtable. +;; the two lists and add the shorter list to a hashtable. Returns the +;; hashtable and the shorter list. (defun list-to-hashtable (list1 list2 &key test test-not key) ;; Don't currently support test-not when converting a list to a hashtable (unless test-not @@ -763,13 +765,18 @@ (unless hash-test (return-from list-to-hashtable (values nil nil))) (multiple-value-bind (len shorter-list) + ;; Find the list with the shorter length. If they're they + ;; same, we prefer the second list to the first list since + ;; the hashtable implementation is slightly simplier. (do ((length 0 (1+ length)) (l1 list1 (cdr l1)) (l2 list2 (cdr l2))) - ((cond ((null l1) - (return (values length list1))) - ((null l2) - (return (values length list2)))))) + ((cond ((null l2) + (return (values length list2))) + ((null l1) + (return (values length list1)))))) + ;; If the list is too short, the hashtable makes things + ;; slower. We also need to balance memory usage. (when (< len *min-list-length-for-hashtable*) (return-from list-to-hashtable (values nil nil))) (cond ((eq shorter-list list2) @@ -855,18 +862,20 @@ (declare (inline member)) (if (and testp notp) (error "Test and test-not both supplied.")) + ;; Quick exit + (when (null list) + (return-from set-difference list1)) + (multiple-value-bind (hashtable shorter-list) (list-to-hashtable list1 list2 :key key :test test :test-not test-not) (cond ((null hashtable) ;; Default implementation because we didn't create the hash ;; table. - (if (null list2) - list1 - (let ((res nil)) - (dolist (elt list1) - (if (not (with-set-keys (member (apply-key key elt) list2))) - (push elt res))) - res))) + (let ((res nil)) + (dolist (elt list1) + (if (not (with-set-keys (member (apply-key key elt) list2))) + (push elt res))) + res)) ((eq shorter-list list2) ;; list2 was placed in hash table. (let (diff)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/653dc42ff6285a475709fe2...