Raymond Toy pushed to branch issue-240-subsetp-with-hash-table at cmucl / cmucl
Commits: e0d52154 by Raymond Toy at 2023-08-17T16:55:56-07:00 for subsetp, hash the shorter of the two lists.
Since subsetp is kind of symmetrical and duplicates don't matter, we can hash either list. So let's hash the shorter of the two lists to minimize the host of creating the hashtable.
- - - - -
1 changed file:
- src/code/list.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -997,22 +997,55 @@ (when (and testp notp) (error "Test and test-not both supplied."))
- ;; SUBSETP is used early in TYPE-INIT where hash tables aren't - ;; available yet, so we can't use hashtables then. LISPINIT will - ;; take care to disable this for the kernel.core. SAVE will set - ;; this to true it's safe to use hash tables for SUBSETP. - (let ((hashtable (when *allow-hashtable-for-set-functions* - (list-to-hashtable list2 key test test-not)))) - (cond (hashtable - (dolist (item list1) - (unless (nth-value 1 (gethash (apply-key key item) hashtable)) - (return-from subsetp nil))) - t) - ((null hashtable) - (dolist (item list1) - (unless (with-set-keys (member (apply-key key item) list2)) - (return-from subsetp nil))) - T)))) + (flet ((lists-to-hashtable () + ;; Find the shorter list and return the length and the shorter list + (when test-not + (return-from lists-to-hashtable nil)) + (let ((hash-test (let ((test-fn (if (and (symbolp test) + (fboundp test)) + (fdefinition test) + test))) + (cond ((eql test-fn #'eq) 'eq) + ((eql test-fn #'eql) 'eql) + ((eql test-fn #'equal) 'equal) + ((eql test-fn #'equalp) 'equalp))))) + (unless hash-test + (return-from lists-to-hashtable nil)) + (multiple-value-bind (min-length shorter-list) + (do ((len 0 (1+ len)) + (lst1 list1 (cdr lst1)) + (lst2 list2 (cdr lst2))) + ((or (null lst1) (null lst2)) + (values len (if (null lst1) list1 list2)))) + (when (< min-length *min-list-length-for-hashtable*) + (return-from lists-to-hashtable nil)) + (let ((hashtable (make-hash-table :test hash-test :size min-length))) + (dolist (item shorter-list) + (setf (gethash (apply-key key item) hashtable) item)) + (values hashtable shorter-list)))))) + + ;; SUBSETP is used early in TYPE-INIT where hash tables aren't + ;; available yet, so we can't use hashtables then. LISPINIT will + ;; take care to disable this for the kernel.core. SAVE will set + ;; this to true it's safe to use hash tables for SUBSETP. + (multiple-value-bind (hashtable shorter-list) + (when *allow-hashtable-for-set-functions* + (lists-to-hashtable)) + (cond (hashtable + (cond ((eq shorter-list list1) + (dolist (item list2) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (return-from subsetp nil)))) + ((eq shorter-list list2) + (dolist (item list1) + (unless (nth-value 1 (gethash (apply-key key item) hashtable)) + (return-from subsetp nil))))) + t) + ((null hashtable) + (dolist (item list1) + (unless (with-set-keys (member (apply-key key item) list2)) + (return-from subsetp nil))) + T)))))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/e0d5215400dab3169ab663fe...