Author: sburson
Date: Sun Jun 10 21:34:38 2007
New Revision: 14
Modified:
trunk/Code/fset.lisp
trunk/Code/wb-trees.lisp
Log:
Added historically-related set/bag/map optimization.
This optimization applies to `union', `intersection', `set-difference',
`set-difference-2', `bag-difference', and `map-merge'; and `compare' on
sets, bags, and maps. (Of these, the difference operations are probably the
ones on which it is most useful.) It very cheaply detects cases where the
two operands share some of their subtrees, and takes the appropriate
shortcut. (For example, the set-difference of a subtree and itself is the
empty set.) The two operands are likely to share some subtrees if they are
historically related; e.g., if one is the result of performing a small
number of `with' and/or `less' operations on the other, or if both of them
are related in this way to a third collection. In such cases, these
algorithms can now run in log time rather than linear time, making this a
potentially quite significant optimization.
Modified: trunk/Code/fset.lisp
==============================================================================
--- trunk/Code/fset.lisp (original)
+++ trunk/Code/fset.lisp Sun Jun 10 21:34:38 2007
@@ -204,12 +204,12 @@
(defgeneric fold (fn collection &optional initial-value)
(:documentation
- "Iterates over `collection', maintaining a state S; on each iteration,
-`fn' is called on S and the next member of `collection', and the result
-is used as the new value of S; finally, returns S. The first iteration is
-special: if `initial-value' is supplied, it is used as the initial S; otherwise,
-the first member of `collection' is used as the initial S, and `fn' is not
-called on this iteration."))
+ "Iterates over `collection', maintaining a state S; on each iteration, `fn'
+is called on S and the next member of `collection', and the result is used as
+the new value of S; finally, returns S. The first iteration is special: if
+`initial-value' is supplied, it is used as the initial S; otherwise, the first
+member of `collection' is used as the initial S, and `fn' is not called on this
+iteration."))
(defgeneric domain (map)
(:documentation
@@ -222,14 +222,15 @@
(defgeneric map-merge (map1 map2 &optional val-fn)
(:documentation
- "Returns a map containing all the keys of `map1' and `map2', where
-the value for each key contained in only one map is the value from that
-map, and the value for each key contained in both maps is the result of
-calling `val-fn' on the key, the value from `map1', and the value from
-`map2'. `val-fn' defaults to simply returning its third argument, so
-the entries in `map2' simply shadow those in `map1'. The default for the
-new map is computed by calling `val-fn' on the symbol `fset:map-default'
-and the defaults for the two maps."))
+ "Returns a map containing all the keys of `map1' and `map2', where the
+value for each key contained in only one map is the value from that map, and
+the value for each key contained in both maps is the result of calling `val-fn'
+on the key, the value from `map1', and the value from `map2'. `val-fn'
+defaults to simply returning its third argument, so the entries in `map2'
+simply shadow those in `map1'. Also, `val-fn' must have the property that if
+its second and third arguments are equal, its result is equal to them. The
+default for the new map is computed by calling `val-fn' on the symbol
+`fset:map-default' and the defaults for the two maps."))
(defgeneric restrict (map set)
(:documentation
Modified: trunk/Code/wb-trees.lisp
==============================================================================
--- trunk/Code/wb-trees.lisp (original)
+++ trunk/Code/wb-trees.lisp Sun Jun 10 21:34:38 2007
@@ -461,7 +461,13 @@
and `tree2' are in this range."
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree1 tree2))
- (cond ((null tree2)
+ (cond ;; If the sets are historically related -- one was produced by a sufficiently
+ ;; small number of `with' and `less' operations on the other, or they are both
+ ;; related in this way to a third set -- then we might get lucky and find
+ ;; ourselves with the same subtree on both sides. This can reduce this
+ ;; linear-time algorithm to log-time.
+ ((eq tree1 tree2) (WB-Set-Tree-Split tree1 lo hi))
+ ((null tree2)
(WB-Set-Tree-Split tree1 lo hi))
((null tree1)
(WB-Set-Tree-Split tree2 lo hi))
@@ -502,7 +508,9 @@
of `tree1' and `tree2' are in this range."
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree1 tree2))
- (cond ((or (null tree1) (null tree2))
+ (cond ((eq tree1 tree2) ; historically-related-set optimization
+ (WB-Set-Tree-Split tree1 lo hi))
+ ((or (null tree1) (null tree2))
nil)
((and (simple-vector-p tree1) (simple-vector-p tree2))
(Vector-Set-Intersect tree1 tree2 lo hi))
@@ -539,7 +547,8 @@
of `tree1' and `tree2' are in this range."
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree1 tree2))
- (cond ((null tree1) nil)
+ (cond ((eq tree1 tree2) nil) ; historically-related-set optimization
+ ((null tree1) nil)
((null tree2)
(WB-Set-Tree-Split tree1 lo hi))
((and (simple-vector-p tree1) (simple-vector-p tree2))
@@ -594,7 +603,8 @@
this range."
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree1 tree2))
- (cond ((or (null tree1) (null tree2))
+ (cond ((eq tree1 tree2) (values nil nil)) ; historically-related-set optimization
+ ((or (null tree1) (null tree2))
(values (WB-Set-Tree-Split tree1 lo hi)
(WB-Set-Tree-Split tree2 lo hi)))
((and (simple-vector-p tree1) (simple-vector-p tree2))
@@ -663,13 +673,15 @@
;; This is similar to the other hedge algorithms, but there is a key difference:
;; it is concerned not with the values of nodes but with their rank, that is,
;; the number of values to their left. The `base' parameters specify, for
- ;; each tree, the number of values above and to the left of the tree.
+ ;; each tree, the number of values to the left of the tree.
;; Another subtlety: we can return as soon as we get a comparison result of
;; ':less or ':greater, but ':unequal has to wait until the end.
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree1 tree2)
(type fixnum base1 base2 lo hi))
- (cond ((= lo hi) ':equal)
+ (cond ((and (eq tree1 tree2) (= base1 base2)) ; historically-related-set optimization
+ ':equal)
+ ((= lo hi) ':equal)
((and (simple-vector-p tree1) (simple-vector-p tree2))
(let ((unequal? nil))
(or (gmap :or #'(lambda (val1 val2)
@@ -709,8 +721,8 @@
(WB-Set-Tree-Rank-Trim (WB-Set-Tree-Node-Right tree1)
new-lo new-lo hi))
(tree2a base2a (WB-Set-Tree-Rank-Trim tree2 base2 new-lo hi))
- ((right-comp (WB-Set-Tree-Compare-Rng right1a base1a tree2a base2a
- new-lo hi))))))
+ ((right-comp (WB-Set-Tree-Compare-Rng
+ right1a base1a tree2a base2a new-lo hi))))))
(if (not (eq right-comp ':equal))
right-comp
(if (eq left-comp ':unequal) ':unequal val-comp))))))))))
@@ -730,7 +742,8 @@
(values tree base)
(WB-Set-Tree-Rank-Trim (WB-Set-Tree-Node-Left tree) base lo hi))
(WB-Set-Tree-Rank-Trim (WB-Set-Tree-Node-Right tree)
- (+ node-rank (Set-Value-Size (WB-Set-Tree-Node-Value tree)))
+ (+ node-rank
+ (Set-Value-Size (WB-Set-Tree-Node-Value tree)))
lo hi)))))
(defun WB-Set-Tree-Rank-Element (tree rank)
@@ -768,7 +781,8 @@
(defun WB-Set-Tree-Subset?-Rng (tree1 tree2 lo hi)
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree1 tree2))
- (cond ((null tree1) t)
+ (cond ((eq tree1 tree2) t) ; historically-related-set optimization
+ ((null tree1) t)
((and (simple-vector-p tree1) (or (null tree2) (simple-vector-p tree2)))
(Vector-Set-Subset? tree1 tree2 lo hi))
((simple-vector-p tree1)
@@ -897,7 +911,9 @@
(defun WB-Set-Tree-Trim (tree lo hi)
"Corresponds to Adams' `trim' and variants. Removes any tree nodes whose
-values are less than `lo' or greater than `hi'."
+values are less than `lo' or greater than `hi'. Note, this does _not_ guarantee
+that the result only contains values between `lo' and `hi'; use `-Split' for
+that. This, however, doesn't cons."
(declare (optimize (speed 3) (safety 0))
(type WB-Set-Tree tree))
(cond ((null tree) nil)
@@ -1841,7 +1857,9 @@
(defun WB-Bag-Tree-Union-Rng (tree1 tree2 lo hi)
(declare (optimize (speed 3) (safety 0))
(type WB-Bag-Tree tree1 tree2))
- (cond ((null tree2)
+ (cond ((eq tree1 tree2) ; historically-related-bag optimization
+ (WB-Bag-Tree-Split tree1 lo hi))
+ ((null tree2)
(WB-Bag-Tree-Split tree1 lo hi))
((null tree1)
(WB-Bag-Tree-Split tree2 lo hi))
@@ -1911,7 +1929,9 @@
of `tree1' and `tree2' are in this range."
(declare (optimize (speed 3) (safety 0))
(type WB-Bag-Tree tree1 tree2))
- (cond ((or (null tree1) (null tree2))
+ (cond ((eq tree1 tree2) ; historically-related-bag optimization
+ (WB-Bag-Tree-Split tree1 lo hi))
+ ((or (null tree1) (null tree2))
nil)
((and (consp tree1) (consp tree2))
(Vector-Pair-Bag-Intersect tree1 tree2 lo hi))
@@ -1986,7 +2006,8 @@
of `tree1' and `tree2' are in this range."
(declare (optimize (speed 3) (safety 0))
(type WB-Bag-Tree tree1 tree2))
- (cond ((null tree1) nil)
+ (cond ((eq tree1 tree2) nil) ; historically-related-bag optimization
+ ((null tree1) nil)
((null tree2)
(WB-Bag-Tree-Split tree1 lo hi))
((and (consp tree1) (consp tree2))
@@ -2041,7 +2062,9 @@
(defun WB-Bag-Tree-Compare-Rng (tree1 base1 tree2 base2 lo hi)
;; See notes at `WB-Set-Tree-Compare-Rng'.
- (cond ((= lo hi) ':equal)
+ (cond ((and (eq tree1 tree2) (= base1 base2)) ; historically-related-bag optimization
+ ':equal)
+ ((= lo hi) ':equal)
((and (consp tree1) (consp tree2))
(let ((unequal? nil))
(or (gmap :or #'(lambda (val1 count1 val2 count2)
@@ -3475,7 +3498,9 @@
(declare (optimize (speed 3) (safety 0))
(type function val-fn)
(type WB-Map-Tree tree1 tree2))
- (cond ((null tree2)
+ (cond ((eq tree1 tree2) ; historically-related-map optimization
+ (WB-Map-Tree-Split tree1 lo hi))
+ ((null tree2)
(WB-Map-Tree-Split tree1 lo hi))
((null tree1)
(WB-Map-Tree-Split tree2 lo hi))
@@ -3646,7 +3671,9 @@
(declare (optimize (speed 3) (safety 0))
(type WB-Map-Tree tree1 tree2)
(type fixnum base1 base2 lo hi))
- (cond ((= lo hi) ':equal)
+ (cond ((and (eq tree1 tree2) (= base1 base2)) ; historically-related-map optimization
+ ':equal)
+ ((= lo hi) ':equal)
((and (consp tree1) (consp tree2))
(let ((unequal? nil))
(or (gmap :or #'(lambda (key1 val1 key2 val2)