Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 87e54e03 by Raymond Toy at 2023-08-25T14:15:52+00:00 Address #240: Use hashtable for nset-diff, nunion, nintersection
- - - - - 4542d55e by Raymond Toy at 2023-08-25T14:16:04+00:00 Merge branch 'issue-240-add-hashtable-for-destructive-set-ops' into 'master'
Address #240: Use hashtable for nset-diff, nunion, nintersection
See merge request cmucl/cmucl!167 - - - - -
2 changed files:
- src/code/list.lisp - tests/sets.lisp
Changes:
===================================== src/code/list.lisp ===================================== @@ -749,7 +749,9 @@ (defparameter *min-list-length-for-hashtable* 15)
-(declaim (start-block list-to-hashtable union intersection set-difference)) +(declaim (start-block list-to-hashtable + union intersection set-difference + nunion nintersection nset-difference))
;; Main code to process a set function. INIT-RES initializes the ;; value of RES, which holds the result of the set function. @@ -836,8 +838,6 @@ (process-set nil t))
-(declaim (end-block)) - ;;; Destination and source are setf-able and many-evaluable. Sets the source ;;; to the cdr, and "conses" the 1st elt of source to destination. ;;; @@ -847,19 +847,42 @@ (cdr temp) ,destination ,destination temp)))
+;;; Main body for destructive set operations. INIT-RES initializes +;;; the result list. INVERT-P is T if the result of the TEST-FORM +;;; should be inverted. TEST-FORM is the form used for determining +;;; how to update the result. +(defmacro nprocess-set-body (init-res invert-p test-form) + `(let ((res ,init-res) + (list1 list1)) + (do () + ((endp list1)) + (if ,(if invert-p + `(not ,test-form) + test-form) + (steve-splice list1 res) + (setq list1 (cdr list1)))) + res)) + +;; Implementation of the destructive set operations. INIT-RES +;; initializes the value of the result list. INVERT-P indicates +;; whether to invert the test-form used to determine how the result +;; should be updated. +(defmacro nprocess-set (init-res invert-p) + `(let ((hashtable (list-to-hashtable list2 key test test-not))) + (if hashtable + (nprocess-set-body ,init-res ,invert-p + (nth-value 1 (gethash (apply-key key (car list1)) hashtable))) + (nprocess-set-body ,init-res ,invert-p + (with-set-keys (member (apply-key key (car list1)) list2)))))) + + (defun nunion (list1 list2 &key key (test #'eql testp) (test-not nil notp)) "Destructively returns the union list1 and list2." (declare (inline member)) (if (and testp notp) (error "Test and test-not both supplied.")) - (let ((res list2) - (list1 list1)) - (do () - ((endp list1)) - (if (not (with-set-keys (member (apply-key key (car list1)) list2))) - (steve-splice list1 res) - (setf list1 (cdr list1)))) - res)) + + (nprocess-set list2 t))
(defun nintersection (list1 list2 &key key (test #'eql testp) (test-not nil notp)) @@ -867,13 +890,8 @@ (declare (inline member)) (if (and testp notp) (error "Test and test-not both supplied.")) - (let ((res nil) - (list1 list1)) - (do () ((endp list1)) - (if (with-set-keys (member (apply-key key (car list1)) list2)) - (steve-splice list1 res) - (setq list1 (Cdr list1)))) - res)) + + (nprocess-set nil nil))
(defun nset-difference (list1 list2 &key key (test #'eql testp) (test-not nil notp)) @@ -881,14 +899,10 @@ (declare (inline member)) (if (and testp notp) (error "Test and test-not both supplied.")) - (let ((res nil) - (list1 list1)) - (do () ((endp list1)) - (if (not (with-set-keys (member (apply-key key (car list1)) list2))) - (steve-splice list1 res) - (setq list1 (cdr list1)))) - res))
+ (nprocess-set nil t)) + +(declaim (end-block))
(defun set-exclusive-or (list1 list2 &key key (test #'eql testp) (test-not nil notp))
===================================== tests/sets.lisp ===================================== @@ -88,8 +88,37 @@ :test 'eql :test-not 'eql)))
-
+(define-test nset-diff.1 + (:tag :issues) + ;; From CLHS + (flet + ((test1 (min-length-limit) + (let ((lisp::*min-list-length-for-hashtable* min-length-limit) + (lst1 (list "A" "b" "C" "d")) + (lst2 (list "a" "B" "C" "d"))) + (assert-equal '("b" "A") + (nset-difference lst1 lst2 :test 'equal)) + ;; This isn't specified by the CLHS, but it is what we do. + (assert-equal '("A") lst1)))) + (test1 100) + (test1 1))) + +(define-test nset-diff.key + (:tag :issues) + (flet + ((test (min-length-limit) + ;; From CLHS + (let ((lisp::*min-list-length-for-hashtable* min-length-limit) + (lst1 (list '("a" . "b") '("c" . "d") '("e" . "f"))) + (lst2 (list '("c" . "a") '("e" . "b") '("d" . "a")))) + (assert-equal '(("e" . "f") ("c" . "d")) + (nset-difference lst1 lst2 :test 'equal :key #'cdr)) + ;; This isn't specified by the CLHS, but it is what we do. + (assert-equal '(("a" . "b") ("c" . "d")) lst1)))) + (test 100) + (test 1))) + (define-test union.hash-eql (:tag :issues) ;; For union to use hashtables by making the threshold @@ -173,6 +202,33 @@ :test 'eql :test-not 'eql)))
+(define-test nunion.1 + (:tag :issues) + (flet + ((test (min-list-length) + (let ((lisp::*min-list-length-for-hashtable* min-list-length) + (lst1 (list 1 2 '(1 2) "a" "b")) + (lst2 (list 2 3 '(2 3) "B" "C"))) + (assert-equal '("b" "a" (1 2) 1 2 3 (2 3) "B" "C") + (nunion lst1 lst2)) + (assert-equal '(1 2 3 (2 3) "B" "C") + lst1)))) + (test 100) + (test 1))) + +(define-test nintersection.1 + (:tag :issues) + (flet + ((test (min-list-length) + (let ((lisp::*min-list-length-for-hashtable* min-list-length) + (lst1 (list 1 1 2 3 4 'a 'b 'c "A" "B" "C" "d")) + (lst2 (list 1 4 5 'b 'c 'd "a" "B" "c" "D"))) + (assert-equal '(c b 4 1 1) + (nintersection lst1 lst2)) + (assert-equal '(1) lst1)))) + (test 100) + (test 1))) +
(define-test subsetp.hash-eq (:tag :issues)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/39817da21c213415dd494c3...