Raymond Toy pushed to branch issue-240-add-hashtable-for-destructive-set-ops at cmucl / cmucl
Commits: 0b497d8c by Raymond Toy at 2023-08-21T20:15:07+00:00 Address #240: Speed up subsetp with a hashtable
- - - - - bfae1626 by Raymond Toy at 2023-08-21T20:15:21+00:00 Merge branch 'issue-240-subsetp-with-hash-table' into 'master'
Address #240: Speed up subsetp with a hashtable
Closes #240
See merge request cmucl/cmucl!164 - - - - - 84d77f51 by Raymond Toy at 2023-08-21T15:54:30-07:00 Merge branch 'master' into issue-240-add-hashtable-for-destructive-set-ops
- - - - -
4 changed files:
- src/code/list.lisp - src/code/type.lisp - src/i18n/locale/cmucl.pot - tests/sets.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -1022,14 +1022,69 @@ (rplacd splicex (cdr x))) (setq splicex x)))))
+(declaim (start-block shorter-list-to-hashtable subsetp)) + +(defun shorter-list-to-hashtable (list1 list2 key test test-not) + ;; Find the shorter list and return the length and the shorter list + (when test-not + (return-from shorter-list-to-hashtable nil)) + (let ((hash-test (let ((test-fn (if (and (symbolp test) + (fboundp test)) + (fdefinition test) + test))) + (cond ((eql test-fn #'eq) 'eq) + ((eql test-fn #'eql) 'eql) + ((eql test-fn #'equal) 'equal) + ((eql test-fn #'equalp) 'equalp))))) + (unless hash-test + (return-from shorter-list-to-hashtable nil)) + (multiple-value-bind (min-length shorter-list) + (do ((len 0 (1+ len)) + (lst1 list1 (cdr lst1)) + (lst2 list2 (cdr lst2))) + ((or (null lst1) (null lst2)) + (values len (if (null lst1) list1 list2)))) + (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*) + (return-from shorter-list-to-hashtable nil)) + (let ((hashtable (make-hash-table :test hash-test :size min-length))) + (dolist (item shorter-list) + (setf (gethash (apply-key key item) hashtable) item)) + (values hashtable shorter-list))))) + (defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Returns T if every element in list1 is also in list2." (declare (inline member)) - (dolist (elt list1) - (unless (with-set-keys (member (apply-key key elt) list2)) - (return-from subsetp nil))) - T) + (when (and testp notp) + (error "Test and test-not both supplied.")) + + ;; SUBSETP is used early in TYPE-INIT where hash tables aren't + ;; available yet, so we can't use hashtables then. LISPINIT will + ;; take care to disable this for the kernel.core. SAVE will set + ;; this to true when it's safe to use hash tables for SUBSETP. + (multiple-value-bind (hashtable shorter-list) + (when t + (shorter-list-to-hashtable list1 list2 key test test-not)) + (cond (hashtable + (cond ((eq shorter-list list1) + ;; Remove any item from list2 from the hashtable containing list1. + (dolist (item list2) + (remhash (apply-key key item) hashtable)) + ;; If the hash table is now empty, then every + ;; element in list1 appeared in list2, so list1 is a + ;; subset of list2. + (zerop (hash-table-count hashtable))) + ((eq shorter-list list2) + (dolist (item list1) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (return-from subsetp nil))) + t))) + (t + (dolist (item list1) + (unless (with-set-keys (member (apply-key key item) list2)) + (return-from subsetp nil))) + T))))
+(declaim (end-block))
;;; Functions that operate on association lists
===================================== src/code/type.lisp ===================================== @@ -377,6 +377,14 @@ (cold-load-init (setq *use-implementation-types* t)) (declaim (type boolean *use-implementation-types*))
+(defvar *min-list-length-for-subsetp-hashtable* 150 + "The minimum length of either list argument for subsetp where a + hashtable is used to speed up processing instead of using a basic + list implementation. This value was determined by experimentation.") + +(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150)) +(declaim (type fixnum *min-list-length-for-subsetp-hashtable*)) + ;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION} -- Interface ;;; ;;; These functions are used as method for types which need a complex
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -1155,6 +1155,13 @@ msgid "" " affects array types." msgstr ""
+#: src/code/type.lisp +msgid "" +"The minimum length of either list argument for subsetp where a\n" +" hashtable is used to speed up processing instead of using a basic\n" +" list implementation. This value was determined by experimentation." +msgstr "" + #: src/code/type.lisp msgid "Subtypep is illegal on this type:~% ~S" msgstr ""
===================================== tests/sets.lisp ===================================== @@ -280,4 +280,3 @@ '(3 4) :test 'eql :test-not 'equal))) ->>>>>>> Stashed changes
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/792673c330f128f0944c462...