Raymond Toy pushed to branch issue-240-clean-up-hashtable-impl at cmucl / cmucl
Commits: 0b497d8c by Raymond Toy at 2023-08-21T20:15:07+00:00 Address #240: Speed up subsetp with a hashtable
- - - - - bfae1626 by Raymond Toy at 2023-08-21T20:15:21+00:00 Merge branch 'issue-240-subsetp-with-hash-table' into 'master'
Address #240: Speed up subsetp with a hashtable
Closes #240
See merge request cmucl/cmucl!164 - - - - - afaeb420 by Raymond Toy at 2023-08-22T14:00:25+00:00 Fix #253: Block-compile list-to-hashtable and callers
- - - - - a5b2c0f8 by Raymond Toy at 2023-08-22T14:02:24+00:00 Merge branch 'issue-253-block-compile-list-to-hashtable' into 'master'
Fix #253: Block-compile list-to-hashtable and callers
Closes #253
See merge request cmucl/cmucl!166 - - - - - 97b6cca7 by Raymond Toy at 2023-08-22T07:07:49-07:00 Merge branch 'master' into issue-240-clean-up-hashtable-impl
- - - - -
4 changed files:
- src/code/list.lisp - src/code/type.lisp - src/i18n/locale/cmucl.pot - tests/sets.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -749,6 +749,7 @@ (defparameter *min-list-length-for-hashtable* 15)
+(declaim (start-block list-to-hashtable union intersection set-difference)) ;; Convert a list to a hashtable. The hashtable does not handle ;; duplicated values in the list. Returns the hashtable. (defun list-to-hashtable (list key test test-not) @@ -803,29 +804,6 @@ (process (with-set-keys (member (apply-key key item) list2))))) res)))
-;;; Destination and source are setf-able and many-evaluable. Sets the source -;;; to the cdr, and "conses" the 1st elt of source to destination. -;;; -(defmacro steve-splice (source destination) - `(let ((temp ,source)) - (setf ,source (cdr ,source) - (cdr temp) ,destination - ,destination temp))) - -(defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp)) - "Destructively returns the union list1 and list2." - (declare (inline member)) - (if (and testp notp) - (error "Test and test-not both supplied.")) - (let ((res list2) - (list1 list1)) - (do () - ((endp list1)) - (if (not (with-set-keys (member (apply-key key (car list1)) list2))) - (steve-splice list1 res) - (setf list1 (cdr list1)))) - res)) -
(defun intersection (list1 list2 &key key (test #'eql testp) (test-not nil notp)) @@ -847,20 +825,6 @@ (t (process (with-set-keys (member (apply-key key item) list2))))))))
-(defun nintersection (list1 list2 &key key - (test #'eql testp) (test-not nil notp)) - "Destructively returns the intersection of list1 and list2." - (declare (inline member)) - (if (and testp notp) - (error "Test and test-not both supplied.")) - (let ((res nil) - (list1 list1)) - (do () ((endp list1)) - (if (with-set-keys (member (apply-key key (car list1)) list2)) - (steve-splice list1 res) - (setq list1 (Cdr list1)))) - res)) - (defun set-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Returns the elements of list1 which are not in list2." (declare (inline member)) @@ -888,6 +852,45 @@ (process (with-set-keys (member (apply-key key item) list2))))))))
+(declaim (end-block)) + +;;; Destination and source are setf-able and many-evaluable. Sets the source +;;; to the cdr, and "conses" the 1st elt of source to destination. +;;; +(defmacro steve-splice (source destination) + `(let ((temp ,source)) + (setf ,source (cdr ,source) + (cdr temp) ,destination + ,destination temp))) + +(defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp)) + "Destructively returns the union list1 and list2." + (declare (inline member)) + (if (and testp notp) + (error "Test and test-not both supplied.")) + (let ((res list2) + (list1 list1)) + (do () + ((endp list1)) + (if (not (with-set-keys (member (apply-key key (car list1)) list2))) + (steve-splice list1 res) + (setf list1 (cdr list1)))) + res)) + +(defun nintersection (list1 list2 &key key + (test #'eql testp) (test-not nil notp)) + "Destructively returns the intersection of list1 and list2." + (declare (inline member)) + (if (and testp notp) + (error "Test and test-not both supplied.")) + (let ((res nil) + (list1 list1)) + (do () ((endp list1)) + (if (with-set-keys (member (apply-key key (car list1)) list2)) + (steve-splice list1 res) + (setq list1 (Cdr list1)))) + res)) + (defun nset-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Destructively returns the elements of list1 which are not in list2." @@ -989,14 +992,69 @@ (rplacd splicex (cdr x))) (setq splicex x)))))
+(declaim (start-block shorter-list-to-hashtable subsetp)) + +(defun shorter-list-to-hashtable (list1 list2 key test test-not) + ;; Find the shorter list and return the length and the shorter list + (when test-not + (return-from shorter-list-to-hashtable nil)) + (let ((hash-test (let ((test-fn (if (and (symbolp test) + (fboundp test)) + (fdefinition test) + test))) + (cond ((eql test-fn #'eq) 'eq) + ((eql test-fn #'eql) 'eql) + ((eql test-fn #'equal) 'equal) + ((eql test-fn #'equalp) 'equalp))))) + (unless hash-test + (return-from shorter-list-to-hashtable nil)) + (multiple-value-bind (min-length shorter-list) + (do ((len 0 (1+ len)) + (lst1 list1 (cdr lst1)) + (lst2 list2 (cdr lst2))) + ((or (null lst1) (null lst2)) + (values len (if (null lst1) list1 list2)))) + (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*) + (return-from shorter-list-to-hashtable nil)) + (let ((hashtable (make-hash-table :test hash-test :size min-length))) + (dolist (item shorter-list) + (setf (gethash (apply-key key item) hashtable) item)) + (values hashtable shorter-list))))) + (defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Returns T if every element in list1 is also in list2." (declare (inline member)) - (dolist (elt list1) - (unless (with-set-keys (member (apply-key key elt) list2)) - (return-from subsetp nil))) - T) - + (when (and testp notp) + (error "Test and test-not both supplied.")) + + ;; SUBSETP is used early in TYPE-INIT where hash tables aren't + ;; available yet, so we can't use hashtables then. LISPINIT will + ;; take care to disable this for the kernel.core. SAVE will set + ;; this to true when it's safe to use hash tables for SUBSETP. + (multiple-value-bind (hashtable shorter-list) + (when t + (shorter-list-to-hashtable list1 list2 key test test-not)) + (cond (hashtable + (cond ((eq shorter-list list1) + ;; Remove any item from list2 from the hashtable containing list1. + (dolist (item list2) + (remhash (apply-key key item) hashtable)) + ;; If the hash table is now empty, then every + ;; element in list1 appeared in list2, so list1 is a + ;; subset of list2. + (zerop (hash-table-count hashtable))) + ((eq shorter-list list2) + (dolist (item list1) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (return-from subsetp nil))) + t))) + (t + (dolist (item list1) + (unless (with-set-keys (member (apply-key key item) list2)) + (return-from subsetp nil))) + T)))) + +(declaim (end-block))
;;; Functions that operate on association lists
===================================== src/code/type.lisp ===================================== @@ -377,6 +377,14 @@ (cold-load-init (setq *use-implementation-types* t)) (declaim (type boolean *use-implementation-types*))
+(defvar *min-list-length-for-subsetp-hashtable* 150 + "The minimum length of either list argument for subsetp where a + hashtable is used to speed up processing instead of using a basic + list implementation. This value was determined by experimentation.") + +(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150)) +(declaim (type fixnum *min-list-length-for-subsetp-hashtable*)) + ;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION} -- Interface ;;; ;;; These functions are used as method for types which need a complex
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -1155,6 +1155,13 @@ msgid "" " affects array types." msgstr ""
+#: src/code/type.lisp +msgid "" +"The minimum length of either list argument for subsetp where a\n" +" hashtable is used to speed up processing instead of using a basic\n" +" list implementation. This value was determined by experimentation." +msgstr "" + #: src/code/type.lisp msgid "Subtypep is illegal on this type:~% ~S" msgstr "" @@ -3209,19 +3216,19 @@ msgid "Test and test-not both supplied." msgstr ""
#: src/code/list.lisp -msgid "Destructively returns the union list1 and list2." +msgid "Returns the intersection of list1 and list2." msgstr ""
#: src/code/list.lisp -msgid "Returns the intersection of list1 and list2." +msgid "Returns the elements of list1 which are not in list2." msgstr ""
#: src/code/list.lisp -msgid "Destructively returns the intersection of list1 and list2." +msgid "Destructively returns the union list1 and list2." msgstr ""
#: src/code/list.lisp -msgid "Returns the elements of list1 which are not in list2." +msgid "Destructively returns the intersection of list1 and list2." msgstr ""
#: src/code/list.lisp
===================================== tests/sets.lisp ===================================== @@ -172,3 +172,54 @@ '(3 4) :test 'eql :test-not 'eql))) + + +(define-test subsetp.hash-eq + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-true + (subsetp '(a b c a) '(a a d d c b) :test 'eq)) + (assert-true + (subsetp '(a b c a b c a b c) '(a a d d c b) :test 'eq)) + (assert-false + (subsetp '(a b c a z) '(a a d d c b) :test 'eq)) + (assert-false + (subsetp '(a b c a b cz) '(a a d d c b) :test 'eq)))) + +(define-test subsetp.hash-eql + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-true + (subsetp '(a b c a) '(a a d d c b) :test 'eql)) + (assert-false + (subsetp '(a b c a z) '(a a d d c b) :test 'eql)))) + +(define-test subsetp.hash-equal + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-true + (subsetp '("a" "b" "c" "a") '("a" "a" "d" "d" "c" "b") :test 'equal)) + (assert-false + (subsetp '("a" "b" "c" "a" "z") '("a" "a" "d" "d" "c" "b") :test 'equal)))) + +(define-test subsetp.hash-equalp + (:tag :issues) + (let ((lisp::*min-list-length-for-hashtable* 2)) + (assert-true + (subsetp '("a" "b" "C" "A") '("a" "a" "d" "d" "c" "b") :test 'equalp)) + (assert-false + (subsetp '("a" "b" "C" "A" "z") '("a" "a" "d" "d" "c" "b") :test 'equalp)))) + +(define-test subsetp.hash-eql-with-key + (:tag :issues) + (assert-true (subsetp '((1 "a") (2 "b") (3 "c")) + '((3 "c") (3 "c") (2 "b") (1 "a")) + :test 'eql + :key #'first))) + +(define-test subsetp.test-and-test-not + (assert-error 'simple-error + (subsetp '(1 2) + '(3 4) + :test 'eql + :test-not 'equal)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/ce2eadfcb1f890cd1c96b6a...