Raymond Toy pushed to branch issue-240-add-hashtable-for-destructive-set-ops at cmucl / cmucl
Commits:
8f262bac by Raymond Toy at 2023-08-22T13:01:02-07:00
Add destructive functions to start-block
We forgot to add `nunion`, `nintersection`, and `nset-difference` to
the `start-block` to make the available. Without this, they're not
globally defined.
- - - - -
1 changed file:
- src/code/list.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -749,7 +749,9 @@
(defparameter *min-list-length-for-hashtable*
15)
-(declaim (start-block list-to-hashtable union intersection set-difference))
+(declaim (start-block list-to-hashtable
+ union intersection set-difference
+ nunion nintersection nset-difference))
;; Convert a list to a hashtable. The hashtable does not handle
;; duplicated values in the list. Returns the hashtable.
(defun list-to-hashtable (list key test test-not)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/8f262bac2129c7d8532c3df…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/8f262bac2129c7d8532c3df…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-240-clean-up-hashtable-impl at cmucl / cmucl
Commits:
0b497d8c by Raymond Toy at 2023-08-21T20:15:07+00:00
Address #240: Speed up subsetp with a hashtable
- - - - -
bfae1626 by Raymond Toy at 2023-08-21T20:15:21+00:00
Merge branch 'issue-240-subsetp-with-hash-table' into 'master'
Address #240: Speed up subsetp with a hashtable
Closes #240
See merge request cmucl/cmucl!164
- - - - -
afaeb420 by Raymond Toy at 2023-08-22T14:00:25+00:00
Fix #253: Block-compile list-to-hashtable and callers
- - - - -
a5b2c0f8 by Raymond Toy at 2023-08-22T14:02:24+00:00
Merge branch 'issue-253-block-compile-list-to-hashtable' into 'master'
Fix #253: Block-compile list-to-hashtable and callers
Closes #253
See merge request cmucl/cmucl!166
- - - - -
97b6cca7 by Raymond Toy at 2023-08-22T07:07:49-07:00
Merge branch 'master' into issue-240-clean-up-hashtable-impl
- - - - -
4 changed files:
- src/code/list.lisp
- src/code/type.lisp
- src/i18n/locale/cmucl.pot
- tests/sets.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -749,6 +749,7 @@
(defparameter *min-list-length-for-hashtable*
15)
+(declaim (start-block list-to-hashtable union intersection set-difference))
;; Convert a list to a hashtable. The hashtable does not handle
;; duplicated values in the list. Returns the hashtable.
(defun list-to-hashtable (list key test test-not)
@@ -803,29 +804,6 @@
(process (with-set-keys (member (apply-key key item) list2)))))
res)))
-;;; Destination and source are setf-able and many-evaluable. Sets the source
-;;; to the cdr, and "conses" the 1st elt of source to destination.
-;;;
-(defmacro steve-splice (source destination)
- `(let ((temp ,source))
- (setf ,source (cdr ,source)
- (cdr temp) ,destination
- ,destination temp)))
-
-(defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp))
- "Destructively returns the union list1 and list2."
- (declare (inline member))
- (if (and testp notp)
- (error "Test and test-not both supplied."))
- (let ((res list2)
- (list1 list1))
- (do ()
- ((endp list1))
- (if (not (with-set-keys (member (apply-key key (car list1)) list2)))
- (steve-splice list1 res)
- (setf list1 (cdr list1))))
- res))
-
(defun intersection (list1 list2 &key key
(test #'eql testp) (test-not nil notp))
@@ -847,20 +825,6 @@
(t
(process (with-set-keys (member (apply-key key item) list2))))))))
-(defun nintersection (list1 list2 &key key
- (test #'eql testp) (test-not nil notp))
- "Destructively returns the intersection of list1 and list2."
- (declare (inline member))
- (if (and testp notp)
- (error "Test and test-not both supplied."))
- (let ((res nil)
- (list1 list1))
- (do () ((endp list1))
- (if (with-set-keys (member (apply-key key (car list1)) list2))
- (steve-splice list1 res)
- (setq list1 (Cdr list1))))
- res))
-
(defun set-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp))
"Returns the elements of list1 which are not in list2."
(declare (inline member))
@@ -888,6 +852,45 @@
(process (with-set-keys (member (apply-key key item) list2))))))))
+(declaim (end-block))
+
+;;; Destination and source are setf-able and many-evaluable. Sets the source
+;;; to the cdr, and "conses" the 1st elt of source to destination.
+;;;
+(defmacro steve-splice (source destination)
+ `(let ((temp ,source))
+ (setf ,source (cdr ,source)
+ (cdr temp) ,destination
+ ,destination temp)))
+
+(defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp))
+ "Destructively returns the union list1 and list2."
+ (declare (inline member))
+ (if (and testp notp)
+ (error "Test and test-not both supplied."))
+ (let ((res list2)
+ (list1 list1))
+ (do ()
+ ((endp list1))
+ (if (not (with-set-keys (member (apply-key key (car list1)) list2)))
+ (steve-splice list1 res)
+ (setf list1 (cdr list1))))
+ res))
+
+(defun nintersection (list1 list2 &key key
+ (test #'eql testp) (test-not nil notp))
+ "Destructively returns the intersection of list1 and list2."
+ (declare (inline member))
+ (if (and testp notp)
+ (error "Test and test-not both supplied."))
+ (let ((res nil)
+ (list1 list1))
+ (do () ((endp list1))
+ (if (with-set-keys (member (apply-key key (car list1)) list2))
+ (steve-splice list1 res)
+ (setq list1 (Cdr list1))))
+ res))
+
(defun nset-difference (list1 list2 &key key
(test #'eql testp) (test-not nil notp))
"Destructively returns the elements of list1 which are not in list2."
@@ -989,14 +992,69 @@
(rplacd splicex (cdr x)))
(setq splicex x)))))
+(declaim (start-block shorter-list-to-hashtable subsetp))
+
+(defun shorter-list-to-hashtable (list1 list2 key test test-not)
+ ;; Find the shorter list and return the length and the shorter list
+ (when test-not
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hash-test (let ((test-fn (if (and (symbolp test)
+ (fboundp test))
+ (fdefinition test)
+ test)))
+ (cond ((eql test-fn #'eq) 'eq)
+ ((eql test-fn #'eql) 'eql)
+ ((eql test-fn #'equal) 'equal)
+ ((eql test-fn #'equalp) 'equalp)))))
+ (unless hash-test
+ (return-from shorter-list-to-hashtable nil))
+ (multiple-value-bind (min-length shorter-list)
+ (do ((len 0 (1+ len))
+ (lst1 list1 (cdr lst1))
+ (lst2 list2 (cdr lst2)))
+ ((or (null lst1) (null lst2))
+ (values len (if (null lst1) list1 list2))))
+ (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*)
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hashtable (make-hash-table :test hash-test :size min-length)))
+ (dolist (item shorter-list)
+ (setf (gethash (apply-key key item) hashtable) item))
+ (values hashtable shorter-list)))))
+
(defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp))
"Returns T if every element in list1 is also in list2."
(declare (inline member))
- (dolist (elt list1)
- (unless (with-set-keys (member (apply-key key elt) list2))
- (return-from subsetp nil)))
- T)
-
+ (when (and testp notp)
+ (error "Test and test-not both supplied."))
+
+ ;; SUBSETP is used early in TYPE-INIT where hash tables aren't
+ ;; available yet, so we can't use hashtables then. LISPINIT will
+ ;; take care to disable this for the kernel.core. SAVE will set
+ ;; this to true when it's safe to use hash tables for SUBSETP.
+ (multiple-value-bind (hashtable shorter-list)
+ (when t
+ (shorter-list-to-hashtable list1 list2 key test test-not))
+ (cond (hashtable
+ (cond ((eq shorter-list list1)
+ ;; Remove any item from list2 from the hashtable containing list1.
+ (dolist (item list2)
+ (remhash (apply-key key item) hashtable))
+ ;; If the hash table is now empty, then every
+ ;; element in list1 appeared in list2, so list1 is a
+ ;; subset of list2.
+ (zerop (hash-table-count hashtable)))
+ ((eq shorter-list list2)
+ (dolist (item list1)
+ (unless (nth-value 1 (gethash (apply-key key item) hashtable))
+ (return-from subsetp nil)))
+ t)))
+ (t
+ (dolist (item list1)
+ (unless (with-set-keys (member (apply-key key item) list2))
+ (return-from subsetp nil)))
+ T))))
+
+(declaim (end-block))
;;; Functions that operate on association lists
=====================================
src/code/type.lisp
=====================================
@@ -377,6 +377,14 @@
(cold-load-init (setq *use-implementation-types* t))
(declaim (type boolean *use-implementation-types*))
+(defvar *min-list-length-for-subsetp-hashtable* 150
+ "The minimum length of either list argument for subsetp where a
+ hashtable is used to speed up processing instead of using a basic
+ list implementation. This value was determined by experimentation.")
+
+(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150))
+(declaim (type fixnum *min-list-length-for-subsetp-hashtable*))
+
;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION} -- Interface
;;;
;;; These functions are used as method for types which need a complex
=====================================
src/i18n/locale/cmucl.pot
=====================================
@@ -1155,6 +1155,13 @@ msgid ""
" affects array types."
msgstr ""
+#: src/code/type.lisp
+msgid ""
+"The minimum length of either list argument for subsetp where a\n"
+" hashtable is used to speed up processing instead of using a basic\n"
+" list implementation. This value was determined by experimentation."
+msgstr ""
+
#: src/code/type.lisp
msgid "Subtypep is illegal on this type:~% ~S"
msgstr ""
@@ -3209,19 +3216,19 @@ msgid "Test and test-not both supplied."
msgstr ""
#: src/code/list.lisp
-msgid "Destructively returns the union list1 and list2."
+msgid "Returns the intersection of list1 and list2."
msgstr ""
#: src/code/list.lisp
-msgid "Returns the intersection of list1 and list2."
+msgid "Returns the elements of list1 which are not in list2."
msgstr ""
#: src/code/list.lisp
-msgid "Destructively returns the intersection of list1 and list2."
+msgid "Destructively returns the union list1 and list2."
msgstr ""
#: src/code/list.lisp
-msgid "Returns the elements of list1 which are not in list2."
+msgid "Destructively returns the intersection of list1 and list2."
msgstr ""
#: src/code/list.lisp
=====================================
tests/sets.lisp
=====================================
@@ -172,3 +172,54 @@
'(3 4)
:test 'eql
:test-not 'eql)))
+
+
+(define-test subsetp.hash-eq
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '(a b c a) '(a a d d c b) :test 'eq))
+ (assert-true
+ (subsetp '(a b c a b c a b c) '(a a d d c b) :test 'eq))
+ (assert-false
+ (subsetp '(a b c a z) '(a a d d c b) :test 'eq))
+ (assert-false
+ (subsetp '(a b c a b cz) '(a a d d c b) :test 'eq))))
+
+(define-test subsetp.hash-eql
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '(a b c a) '(a a d d c b) :test 'eql))
+ (assert-false
+ (subsetp '(a b c a z) '(a a d d c b) :test 'eql))))
+
+(define-test subsetp.hash-equal
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '("a" "b" "c" "a") '("a" "a" "d" "d" "c" "b") :test 'equal))
+ (assert-false
+ (subsetp '("a" "b" "c" "a" "z") '("a" "a" "d" "d" "c" "b") :test 'equal))))
+
+(define-test subsetp.hash-equalp
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '("a" "b" "C" "A") '("a" "a" "d" "d" "c" "b") :test 'equalp))
+ (assert-false
+ (subsetp '("a" "b" "C" "A" "z") '("a" "a" "d" "d" "c" "b") :test 'equalp))))
+
+(define-test subsetp.hash-eql-with-key
+ (:tag :issues)
+ (assert-true (subsetp '((1 "a") (2 "b") (3 "c"))
+ '((3 "c") (3 "c") (2 "b") (1 "a"))
+ :test 'eql
+ :key #'first)))
+
+(define-test subsetp.test-and-test-not
+ (assert-error 'simple-error
+ (subsetp '(1 2)
+ '(3 4)
+ :test 'eql
+ :test-not 'equal)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/ce2eadfcb1f890cd1c96b6…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/ce2eadfcb1f890cd1c96b6…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-240-add-hashtable-for-destructive-set-ops at cmucl / cmucl
Commits:
0b497d8c by Raymond Toy at 2023-08-21T20:15:07+00:00
Address #240: Speed up subsetp with a hashtable
- - - - -
bfae1626 by Raymond Toy at 2023-08-21T20:15:21+00:00
Merge branch 'issue-240-subsetp-with-hash-table' into 'master'
Address #240: Speed up subsetp with a hashtable
Closes #240
See merge request cmucl/cmucl!164
- - - - -
84d77f51 by Raymond Toy at 2023-08-21T15:54:30-07:00
Merge branch 'master' into issue-240-add-hashtable-for-destructive-set-ops
- - - - -
4 changed files:
- src/code/list.lisp
- src/code/type.lisp
- src/i18n/locale/cmucl.pot
- tests/sets.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -1022,14 +1022,69 @@
(rplacd splicex (cdr x)))
(setq splicex x)))))
+(declaim (start-block shorter-list-to-hashtable subsetp))
+
+(defun shorter-list-to-hashtable (list1 list2 key test test-not)
+ ;; Find the shorter list and return the length and the shorter list
+ (when test-not
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hash-test (let ((test-fn (if (and (symbolp test)
+ (fboundp test))
+ (fdefinition test)
+ test)))
+ (cond ((eql test-fn #'eq) 'eq)
+ ((eql test-fn #'eql) 'eql)
+ ((eql test-fn #'equal) 'equal)
+ ((eql test-fn #'equalp) 'equalp)))))
+ (unless hash-test
+ (return-from shorter-list-to-hashtable nil))
+ (multiple-value-bind (min-length shorter-list)
+ (do ((len 0 (1+ len))
+ (lst1 list1 (cdr lst1))
+ (lst2 list2 (cdr lst2)))
+ ((or (null lst1) (null lst2))
+ (values len (if (null lst1) list1 list2))))
+ (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*)
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hashtable (make-hash-table :test hash-test :size min-length)))
+ (dolist (item shorter-list)
+ (setf (gethash (apply-key key item) hashtable) item))
+ (values hashtable shorter-list)))))
+
(defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp))
"Returns T if every element in list1 is also in list2."
(declare (inline member))
- (dolist (elt list1)
- (unless (with-set-keys (member (apply-key key elt) list2))
- (return-from subsetp nil)))
- T)
+ (when (and testp notp)
+ (error "Test and test-not both supplied."))
+
+ ;; SUBSETP is used early in TYPE-INIT where hash tables aren't
+ ;; available yet, so we can't use hashtables then. LISPINIT will
+ ;; take care to disable this for the kernel.core. SAVE will set
+ ;; this to true when it's safe to use hash tables for SUBSETP.
+ (multiple-value-bind (hashtable shorter-list)
+ (when t
+ (shorter-list-to-hashtable list1 list2 key test test-not))
+ (cond (hashtable
+ (cond ((eq shorter-list list1)
+ ;; Remove any item from list2 from the hashtable containing list1.
+ (dolist (item list2)
+ (remhash (apply-key key item) hashtable))
+ ;; If the hash table is now empty, then every
+ ;; element in list1 appeared in list2, so list1 is a
+ ;; subset of list2.
+ (zerop (hash-table-count hashtable)))
+ ((eq shorter-list list2)
+ (dolist (item list1)
+ (unless (nth-value 1 (gethash (apply-key key item) hashtable))
+ (return-from subsetp nil)))
+ t)))
+ (t
+ (dolist (item list1)
+ (unless (with-set-keys (member (apply-key key item) list2))
+ (return-from subsetp nil)))
+ T))))
+(declaim (end-block))
;;; Functions that operate on association lists
=====================================
src/code/type.lisp
=====================================
@@ -377,6 +377,14 @@
(cold-load-init (setq *use-implementation-types* t))
(declaim (type boolean *use-implementation-types*))
+(defvar *min-list-length-for-subsetp-hashtable* 150
+ "The minimum length of either list argument for subsetp where a
+ hashtable is used to speed up processing instead of using a basic
+ list implementation. This value was determined by experimentation.")
+
+(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150))
+(declaim (type fixnum *min-list-length-for-subsetp-hashtable*))
+
;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION} -- Interface
;;;
;;; These functions are used as method for types which need a complex
=====================================
src/i18n/locale/cmucl.pot
=====================================
@@ -1155,6 +1155,13 @@ msgid ""
" affects array types."
msgstr ""
+#: src/code/type.lisp
+msgid ""
+"The minimum length of either list argument for subsetp where a\n"
+" hashtable is used to speed up processing instead of using a basic\n"
+" list implementation. This value was determined by experimentation."
+msgstr ""
+
#: src/code/type.lisp
msgid "Subtypep is illegal on this type:~% ~S"
msgstr ""
=====================================
tests/sets.lisp
=====================================
@@ -280,4 +280,3 @@
'(3 4)
:test 'eql
:test-not 'equal)))
->>>>>>> Stashed changes
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/792673c330f128f0944c46…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/792673c330f128f0944c46…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-240-clean-up-hashtable-impl at cmucl / cmucl
Commits:
ce2eadfc by Raymond Toy at 2023-08-21T13:30:44-07:00
Undo inadvertent indentation change.
- - - - -
1 changed file:
- src/code/list.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -828,7 +828,7 @@
(defun intersection (list1 list2 &key key
- (test #'eql testp) (test-not nil notp))
+ (test #'eql testp) (test-not nil notp))
"Returns the intersection of list1 and list2."
(declare (inline member))
(if (and testp notp)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/ce2eadfcb1f890cd1c96b6a…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/ce2eadfcb1f890cd1c96b6a…
You're receiving this email because of your account on gitlab.common-lisp.net.