Raymond Toy pushed to branch issue-240-add-hashtable-for-destructive-set-ops at cmucl / cmucl
Commits:
0b497d8c by Raymond Toy at 2023-08-21T20:15:07+00:00
Address #240: Speed up subsetp with a hashtable
- - - - -
bfae1626 by Raymond Toy at 2023-08-21T20:15:21+00:00
Merge branch 'issue-240-subsetp-with-hash-table' into 'master'
Address #240: Speed up subsetp with a hashtable
Closes #240
See merge request cmucl/cmucl!164
- - - - -
84d77f51 by Raymond Toy at 2023-08-21T15:54:30-07:00
Merge branch 'master' into issue-240-add-hashtable-for-destructive-set-ops
- - - - -
4 changed files:
- src/code/list.lisp
- src/code/type.lisp
- src/i18n/locale/cmucl.pot
- tests/sets.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -1022,14 +1022,69 @@
(rplacd splicex (cdr x)))
(setq splicex x)))))
+(declaim (start-block shorter-list-to-hashtable subsetp))
+
+(defun shorter-list-to-hashtable (list1 list2 key test test-not)
+ ;; Find the shorter list and return the length and the shorter list
+ (when test-not
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hash-test (let ((test-fn (if (and (symbolp test)
+ (fboundp test))
+ (fdefinition test)
+ test)))
+ (cond ((eql test-fn #'eq) 'eq)
+ ((eql test-fn #'eql) 'eql)
+ ((eql test-fn #'equal) 'equal)
+ ((eql test-fn #'equalp) 'equalp)))))
+ (unless hash-test
+ (return-from shorter-list-to-hashtable nil))
+ (multiple-value-bind (min-length shorter-list)
+ (do ((len 0 (1+ len))
+ (lst1 list1 (cdr lst1))
+ (lst2 list2 (cdr lst2)))
+ ((or (null lst1) (null lst2))
+ (values len (if (null lst1) list1 list2))))
+ (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*)
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hashtable (make-hash-table :test hash-test :size min-length)))
+ (dolist (item shorter-list)
+ (setf (gethash (apply-key key item) hashtable) item))
+ (values hashtable shorter-list)))))
+
(defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp))
"Returns T if every element in list1 is also in list2."
(declare (inline member))
- (dolist (elt list1)
- (unless (with-set-keys (member (apply-key key elt) list2))
- (return-from subsetp nil)))
- T)
+ (when (and testp notp)
+ (error "Test and test-not both supplied."))
+
+ ;; SUBSETP is used early in TYPE-INIT where hash tables aren't
+ ;; available yet, so we can't use hashtables then. LISPINIT will
+ ;; take care to disable this for the kernel.core. SAVE will set
+ ;; this to true when it's safe to use hash tables for SUBSETP.
+ (multiple-value-bind (hashtable shorter-list)
+ (when t
+ (shorter-list-to-hashtable list1 list2 key test test-not))
+ (cond (hashtable
+ (cond ((eq shorter-list list1)
+ ;; Remove any item from list2 from the hashtable containing list1.
+ (dolist (item list2)
+ (remhash (apply-key key item) hashtable))
+ ;; If the hash table is now empty, then every
+ ;; element in list1 appeared in list2, so list1 is a
+ ;; subset of list2.
+ (zerop (hash-table-count hashtable)))
+ ((eq shorter-list list2)
+ (dolist (item list1)
+ (unless (nth-value 1 (gethash (apply-key key item) hashtable))
+ (return-from subsetp nil)))
+ t)))
+ (t
+ (dolist (item list1)
+ (unless (with-set-keys (member (apply-key key item) list2))
+ (return-from subsetp nil)))
+ T))))
+(declaim (end-block))
;;; Functions that operate on association lists
=====================================
src/code/type.lisp
=====================================
@@ -377,6 +377,14 @@
(cold-load-init (setq *use-implementation-types* t))
(declaim (type boolean *use-implementation-types*))
+(defvar *min-list-length-for-subsetp-hashtable* 150
+ "The minimum length of either list argument for subsetp where a
+ hashtable is used to speed up processing instead of using a basic
+ list implementation. This value was determined by experimentation.")
+
+(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150))
+(declaim (type fixnum *min-list-length-for-subsetp-hashtable*))
+
;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION} -- Interface
;;;
;;; These functions are used as method for types which need a complex
=====================================
src/i18n/locale/cmucl.pot
=====================================
@@ -1155,6 +1155,13 @@ msgid ""
" affects array types."
msgstr ""
+#: src/code/type.lisp
+msgid ""
+"The minimum length of either list argument for subsetp where a\n"
+" hashtable is used to speed up processing instead of using a basic\n"
+" list implementation. This value was determined by experimentation."
+msgstr ""
+
#: src/code/type.lisp
msgid "Subtypep is illegal on this type:~% ~S"
msgstr ""
=====================================
tests/sets.lisp
=====================================
@@ -280,4 +280,3 @@
'(3 4)
:test 'eql
:test-not 'equal)))
->>>>>>> Stashed changes
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/792673c330f128f0944c46…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/792673c330f128f0944c46…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-240-clean-up-hashtable-impl at cmucl / cmucl
Commits:
ce2eadfc by Raymond Toy at 2023-08-21T13:30:44-07:00
Undo inadvertent indentation change.
- - - - -
1 changed file:
- src/code/list.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -828,7 +828,7 @@
(defun intersection (list1 list2 &key key
- (test #'eql testp) (test-not nil notp))
+ (test #'eql testp) (test-not nil notp))
"Returns the intersection of list1 and list2."
(declare (inline member))
(if (and testp notp)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/ce2eadfcb1f890cd1c96b6a…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/ce2eadfcb1f890cd1c96b6a…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
0b497d8c by Raymond Toy at 2023-08-21T20:15:07+00:00
Address #240: Speed up subsetp with a hashtable
- - - - -
bfae1626 by Raymond Toy at 2023-08-21T20:15:21+00:00
Merge branch 'issue-240-subsetp-with-hash-table' into 'master'
Address #240: Speed up subsetp with a hashtable
Closes #240
See merge request cmucl/cmucl!164
- - - - -
4 changed files:
- src/code/list.lisp
- src/code/type.lisp
- src/i18n/locale/cmucl.pot
- tests/sets.lisp
Changes:
=====================================
src/code/list.lisp
=====================================
@@ -989,14 +989,69 @@
(rplacd splicex (cdr x)))
(setq splicex x)))))
+(declaim (start-block shorter-list-to-hashtable subsetp))
+
+(defun shorter-list-to-hashtable (list1 list2 key test test-not)
+ ;; Find the shorter list and return the length and the shorter list
+ (when test-not
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hash-test (let ((test-fn (if (and (symbolp test)
+ (fboundp test))
+ (fdefinition test)
+ test)))
+ (cond ((eql test-fn #'eq) 'eq)
+ ((eql test-fn #'eql) 'eql)
+ ((eql test-fn #'equal) 'equal)
+ ((eql test-fn #'equalp) 'equalp)))))
+ (unless hash-test
+ (return-from shorter-list-to-hashtable nil))
+ (multiple-value-bind (min-length shorter-list)
+ (do ((len 0 (1+ len))
+ (lst1 list1 (cdr lst1))
+ (lst2 list2 (cdr lst2)))
+ ((or (null lst1) (null lst2))
+ (values len (if (null lst1) list1 list2))))
+ (when (< min-length kernel::*min-list-length-for-subsetp-hashtable*)
+ (return-from shorter-list-to-hashtable nil))
+ (let ((hashtable (make-hash-table :test hash-test :size min-length)))
+ (dolist (item shorter-list)
+ (setf (gethash (apply-key key item) hashtable) item))
+ (values hashtable shorter-list)))))
+
(defun subsetp (list1 list2 &key key (test #'eql testp) (test-not nil notp))
"Returns T if every element in list1 is also in list2."
(declare (inline member))
- (dolist (elt list1)
- (unless (with-set-keys (member (apply-key key elt) list2))
- (return-from subsetp nil)))
- T)
+ (when (and testp notp)
+ (error "Test and test-not both supplied."))
+
+ ;; SUBSETP is used early in TYPE-INIT where hash tables aren't
+ ;; available yet, so we can't use hashtables then. LISPINIT will
+ ;; take care to disable this for the kernel.core. SAVE will set
+ ;; this to true when it's safe to use hash tables for SUBSETP.
+ (multiple-value-bind (hashtable shorter-list)
+ (when t
+ (shorter-list-to-hashtable list1 list2 key test test-not))
+ (cond (hashtable
+ (cond ((eq shorter-list list1)
+ ;; Remove any item from list2 from the hashtable containing list1.
+ (dolist (item list2)
+ (remhash (apply-key key item) hashtable))
+ ;; If the hash table is now empty, then every
+ ;; element in list1 appeared in list2, so list1 is a
+ ;; subset of list2.
+ (zerop (hash-table-count hashtable)))
+ ((eq shorter-list list2)
+ (dolist (item list1)
+ (unless (nth-value 1 (gethash (apply-key key item) hashtable))
+ (return-from subsetp nil)))
+ t)))
+ (t
+ (dolist (item list1)
+ (unless (with-set-keys (member (apply-key key item) list2))
+ (return-from subsetp nil)))
+ T))))
+(declaim (end-block))
;;; Functions that operate on association lists
=====================================
src/code/type.lisp
=====================================
@@ -377,6 +377,14 @@
(cold-load-init (setq *use-implementation-types* t))
(declaim (type boolean *use-implementation-types*))
+(defvar *min-list-length-for-subsetp-hashtable* 150
+ "The minimum length of either list argument for subsetp where a
+ hashtable is used to speed up processing instead of using a basic
+ list implementation. This value was determined by experimentation.")
+
+(cold-load-init (setq *min-list-length-for-subsetp-hashtable* 150))
+(declaim (type fixnum *min-list-length-for-subsetp-hashtable*))
+
;;; DELEGATE-COMPLEX-{SUBTYPEP-ARG2,INTERSECTION} -- Interface
;;;
;;; These functions are used as method for types which need a complex
=====================================
src/i18n/locale/cmucl.pot
=====================================
@@ -1155,6 +1155,13 @@ msgid ""
" affects array types."
msgstr ""
+#: src/code/type.lisp
+msgid ""
+"The minimum length of either list argument for subsetp where a\n"
+" hashtable is used to speed up processing instead of using a basic\n"
+" list implementation. This value was determined by experimentation."
+msgstr ""
+
#: src/code/type.lisp
msgid "Subtypep is illegal on this type:~% ~S"
msgstr ""
=====================================
tests/sets.lisp
=====================================
@@ -172,3 +172,54 @@
'(3 4)
:test 'eql
:test-not 'eql)))
+
+
+(define-test subsetp.hash-eq
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '(a b c a) '(a a d d c b) :test 'eq))
+ (assert-true
+ (subsetp '(a b c a b c a b c) '(a a d d c b) :test 'eq))
+ (assert-false
+ (subsetp '(a b c a z) '(a a d d c b) :test 'eq))
+ (assert-false
+ (subsetp '(a b c a b cz) '(a a d d c b) :test 'eq))))
+
+(define-test subsetp.hash-eql
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '(a b c a) '(a a d d c b) :test 'eql))
+ (assert-false
+ (subsetp '(a b c a z) '(a a d d c b) :test 'eql))))
+
+(define-test subsetp.hash-equal
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '("a" "b" "c" "a") '("a" "a" "d" "d" "c" "b") :test 'equal))
+ (assert-false
+ (subsetp '("a" "b" "c" "a" "z") '("a" "a" "d" "d" "c" "b") :test 'equal))))
+
+(define-test subsetp.hash-equalp
+ (:tag :issues)
+ (let ((lisp::*min-list-length-for-hashtable* 2))
+ (assert-true
+ (subsetp '("a" "b" "C" "A") '("a" "a" "d" "d" "c" "b") :test 'equalp))
+ (assert-false
+ (subsetp '("a" "b" "C" "A" "z") '("a" "a" "d" "d" "c" "b") :test 'equalp))))
+
+(define-test subsetp.hash-eql-with-key
+ (:tag :issues)
+ (assert-true (subsetp '((1 "a") (2 "b") (3 "c"))
+ '((3 "c") (3 "c") (2 "b") (1 "a"))
+ :test 'eql
+ :key #'first)))
+
+(define-test subsetp.test-and-test-not
+ (assert-error 'simple-error
+ (subsetp '(1 2)
+ '(3 4)
+ :test 'eql
+ :test-not 'equal)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/eab9b8765cc37e24da1e97…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/eab9b8765cc37e24da1e97…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
f9ccc188 by Raymond Toy at 2023-08-21T18:03:25+00:00
Fix #252: Add script to run ansi-tests
- - - - -
eab9b876 by Raymond Toy at 2023-08-21T18:04:16+00:00
Merge branch 'issue-252-script-to-run-ansi-tests' into 'master'
Fix #252: Add script to run ansi-tests
Closes #252
See merge request cmucl/cmucl!165
- - - - -
3 changed files:
- .gitlab-ci.yml
- + bin/run-ansi-tests.sh
- bin/run-tests.sh → bin/run-unit-tests.sh
Changes:
=====================================
.gitlab-ci.yml
=====================================
@@ -62,7 +62,7 @@ linux:test:
- job: linux:build
artifacts: true
script:
- - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
+ - bin/run-unit-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
linux:ansi-test:
stage: ansi-test
@@ -75,14 +75,8 @@ linux:ansi-test:
# Needs artifacts from build (dist/)
- job: linux:build
artifacts: true
- before_script:
- - git clone https://gitlab.common-lisp.net/cmucl/ansi-test.git
- - (cd ansi-test; git checkout cmucl-expected-failures)
script:
- - cd ansi-test
- - make LISP="../dist/bin/lisp -batch -noinit -nositeinit"
- # There should be no unexpected successes or failures; check these separately.
- - grep -a 'No unexpected successes' test.out && grep -a 'No unexpected failures' test.out
+ - bin/run-ansi-tests.sh -l dist/bin/lisp
linux:benchmark:
stage: benchmark
@@ -149,7 +143,7 @@ osx:test:
- job: osx:build
artifacts: true
script:
- - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
+ - bin/run-unit-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
osx:ansi-test:
stage: ansi-test
@@ -162,14 +156,8 @@ osx:ansi-test:
# Needs artifacts from build (dist/)
- job: osx:build
artifacts: true
- before_script:
- - /opt/local/bin/git clone https://gitlab.common-lisp.net/cmucl/ansi-test.git
- - (cd ansi-test; /opt/local/bin/git checkout cmucl-expected-failures)
script:
- - cd ansi-test
- - make LISP="../dist/bin/lisp -batch -noinit -nositeinit"
- # There should be no unexpected successes or failures; check these separately.
- - grep -a 'No unexpected successes' test.out && grep -a 'No unexpected failures' test.out
+ - bin/run-ansi-tests.sh -l dist/bin/lisp
osx:benchmark:
stage: benchmark
=====================================
bin/run-ansi-tests.sh
=====================================
@@ -0,0 +1,48 @@
+#! /bin/bash
+
+# Run the ansi-tests.
+#
+# We need to check ou ansi-tests if we haven't already. We expect
+# this to be run from the root of the cmucl git tree. We will check
+# it out one level up from where we are.
+
+usage() {
+ echo "run-ansi-tests.sh [?h] [-l lisp]"
+ echo " -l lisp Lisp to use for the tests; defaults to lisp"
+ echo " -h|? This help message"
+ echo ""
+ echo "Run the ansi-tests"
+ echo ""
+ echo "If ../ansi-test does not exist a clone is checked out there."
+ echo "Then the ansi-test is run in the clone using the given lisp."
+ exit 0;
+}
+
+LISP=lisp
+while getopts "h?l:" arg
+do
+ case $arg in
+ l) LISP="$PWD/$OPTARG" ;;
+ \?) usage ;;
+ h) usage ;;
+ esac
+done
+
+# Shift out the options
+shift $[$OPTIND - 1]
+
+set -x
+if [ -d ../ansi-test ]; then
+ # We already have clone; make sure it's clean by stashing any changes.
+ (cd ../ansi-test; git stash)
+else
+ (cd ../; git clone https://gitlab.common-lisp.net/cmucl/ansi-test.git)
+fi
+
+cd ../ansi-test
+git checkout cmucl-expected-failures
+
+make LISP="$LISP batch -noinit -nositeinit"
+# There should be no unexpected successes or failures; check these separately
+grep -a 'No unexpected successes' test.out && grep -a 'No unexpected failures' test.out
+
=====================================
bin/run-tests.sh → bin/run-unit-tests.sh
=====================================
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/5b3e11f9235825ec368793…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/5b3e11f9235825ec368793…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-240-subsetp-with-hash-table at cmucl / cmucl
Commits:
e4f4082e by Raymond Toy at 2023-08-21T08:18:12-07:00
Update POT file for new comment
- - - - -
1 changed file:
- src/i18n/locale/cmucl.pot
Changes:
=====================================
src/i18n/locale/cmucl.pot
=====================================
@@ -1155,6 +1155,13 @@ msgid ""
" affects array types."
msgstr ""
+#: src/code/type.lisp
+msgid ""
+"The minimum length of either list argument for subsetp where a\n"
+" hashtable is used to speed up processing instead of using a basic\n"
+" list implementation. This value was determined by experimentation."
+msgstr ""
+
#: src/code/type.lisp
msgid "Subtypep is illegal on this type:~% ~S"
msgstr ""
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/e4f4082e4b015b26b1cc556…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/e4f4082e4b015b26b1cc556…
You're receiving this email because of your account on gitlab.common-lisp.net.