From 0a198ceab8afb11ff5ce1ee614d22bc80970c187 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Tue, 3 Jul 2007 11:45:24 +0100 Subject: [PATCH] aa-tree: Simple balanced binary tree. I've implemented Andersson trees, which are a particularly simple kind of balanced binary tree, with the usual performance guarantees for such things. --- aa-tree.lisp | 443 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mdw.asd | 1 + 2 files changed, 444 insertions(+) create mode 100644 aa-tree.lisp diff --git a/aa-tree.lisp b/aa-tree.lisp new file mode 100644 index 0000000..3c60672 --- /dev/null +++ b/aa-tree.lisp @@ -0,0 +1,443 @@ +;;; -*-lisp-*- +;;; +;;; $Id$ +;;; +;;; Andersson tree implementation +;;; +;;; (c) 2006 Straylight/Edgeware +;;; + +;;;----- Licensing notice --------------------------------------------------- +;;; +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software Foundation, +;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +;;;-------------------------------------------------------------------------- +;;; Package. + +(defpackage #:aa-tree + (:use #:common-lisp #:mdw.base) + (:export #:make-aa-tree #:aa-tree-p #:aa-tree-key< + #:getaa #:updateaa #:mapaa #:doaa #:aa-tree-iterator #:remaa)) +(in-package #:aa-tree) + +;;;-------------------------------------------------------------------------- +;;; The underlying implementation. + +(deftype stack-pointer () '(integer 0 255)) + +(defstruct (tree-node + (:conc-name node-) + (:type vector) + (:constructor make-tree-node + (key &optional data level left right))) + "Structure representing a node in an Andersson tree." + (left nil :type (or null tree-node)) + (right nil :type (or null tree-node)) + (level 0 :type stack-pointer) + key + data) + +(deftype tree-node () 'simple-vector) + +(defstruct (aa-tree + (:predicate treep) + (:constructor make-aa-tree + (key<-name + &aux + (key< (functionify key<-name)))) + (:conc-name tree-)) + "Structure representing an Andersson tree." + (root nil :type (or null tree-node)) + (stack (make-array 32) :type simple-vector) + (key< (slot-uninitialized) :read-only t :type (function (t t) t))) + +(declaim (inline skew split)) + +(defun skew (node) + "Implements the `skew' operation on a tree node, eliminating left-pointing + internal pointers by applying right-rotation. Returns the replacement + node." + (declare (type tree-node node)) + (let ((left (node-left node))) + (when (and left (= (node-level node) (node-level left))) + (shiftf (node-left node) (node-right left) node left)) + node)) + +(defun split (node) + "Implements the `split' operation on a tree node, eliminating overly-large + pseudo-nodes by applying left-rotation. Returns the replacement node." + (declare (type tree-node node)) + (let* ((right (node-right node)) + (rright (and right (node-right right)))) + (when (and rright (= (node-level node) (node-level rright))) + (shiftf (node-right node) (node-left right) node right) + (incf (node-level node))) + node)) + +(defun get-tree-stack (tree) + "Return the current stack for the TREE. This is used to remember the path + taken during a search in tree, so we can fix it up afterwards. Keeping + just one stack for the tree saves on consing; it's not safe to do + simultaneous destructive operations on a tree anyway, so this is a + reasonable thing to do. This function ensures that the stack attached to + the tree is actually large enough before returning it." + (declare (type aa-tree tree)) + (let* ((root (tree-root tree)) + (want (* 4 (+ (if root (node-level root) 0) 2))) + (stack (tree-stack tree)) + (size (array-dimension (tree-stack tree) 0))) + (if (>= size want) + stack + (do ((need (ash size 1) (ash need 1))) + ((>= need want) (setf (tree-stack tree) (make-array need))))))) + +(defun getaa (tree key &optional default) + "Look up the given KEY in an Andersson TREE; if the KEY was found, return + the corresponding data and t, otherwise return DEFAULT and nil." + (declare (type aa-tree tree)) + (let ((key< (tree-key< tree)) + (node (tree-root tree)) + (candidate nil) + (candidate-key nil)) + (declare (type (function (t t) t) key<) + (type (or null tree-node) node candidate)) + (flet ((key< (x y) + (funcall key< x y))) + (declare (inline key<)) + (loop (cond (node + (let ((node-key (node-key node))) + (if (key< key node-key) + (setf node (node-left node)) + (setf candidate node + candidate-key node-key + node (node-right node))))) + ((and candidate (not (key< candidate-key key))) + (return (values (node-data candidate) t))) + (t + (return (values default nil)))))))) + +(defun tree-probe (tree key) + "Do a search in an Andersson TREE for the KEY, returning three values. The + second and third are a stack of alternating nodes and direction bits, and + a stack pointer (empty, ascending), which together describe the path from + the tree root to the successor of the sought-for node. The first is + either the sought-for node itself, or nil if it wasn't there." + (declare (type aa-tree tree)) + (let ((key< (tree-key< tree)) + (stack (get-tree-stack tree)) + (sp 0) + (candidate nil) + (candidate-key nil)) + (declare (type (function (t t) t) key<) + (type simple-vector stack) + (type stack-pointer sp) + (type (or null tree-node) candidate)) + (flet ((pathpush (v i) + (setf (svref stack sp) v + (svref stack (1+ sp)) i) + (incf sp 2)) + (key< (x y) + (funcall key< x y))) + (declare (inline pathpush key<)) + (let ((node (tree-root tree))) + (loop (when (null node) + (return)) + (let* ((node-key (node-key node)) + (dir (cond ((key< key node-key) 0) + (t (setf candidate node + candidate-key node-key) + 1)))) + (pathpush node dir) + (setf node (svref node dir))))) + (values (if (and candidate (not (key< candidate-key key))) + candidate + nil) + stack + sp)))) + +(defun fixup-insert (tree stack sp node) + "TREE is an Andersson tree, STACK and SP are the values from a failed call + to tree-probe, and NODE is a newly-created node. Insert the NODE into + the tree, fix up its balance." + (declare (type aa-tree tree) + (type simple-vector stack) + (type stack-pointer sp) + (type tree-node node)) + (loop (when (zerop sp) + (return)) + (decf sp 2) + (let ((parent (svref stack sp)) + (dir (svref stack (1+ sp)))) + (setf (svref parent dir) node + node parent)) + (setf node (split (skew node)))) + (setf (tree-root tree) node)) + +(defun (setf getaa) (data tree key &optional ignore) + "Inserts a new node with the given KEY into an Andersson TREE, if there + wasn't one already. Returns two values: the requested node, and either t + if the node was inserted, or nil if it was already there." + (declare (type aa-tree tree) + (ignore ignore)) + (multiple-value-bind (node stack sp) (tree-probe tree key) + (cond (node (setf (node-data node) data)) + (t (fixup-insert tree stack sp (make-tree-node key data)) data)))) + +(defun updateaa (tree key func) + "Search TREE for an item with the given KEY. If it was found, call FUNC + with arguments of the node's data and t, and store its result as the + node's new data. If it was absent, call FUNC with arguments nil and nil, + and make a new node with the KEY and return value. The FUNC can escape to + prevent the node being created (though this is probably not useful)." + (declare (type aa-tree tree)) + (multiple-value-bind (node stack sp) (tree-probe tree key) + (cond (node (setf (node-data node) (funcall func (node-data node) t))) + (t (let ((data (funcall func nil nil))) + (fixup-insert tree stack sp (make-tree-node key data)) + data))))) + +(defun remaa (tree key) + "Deletes the node with the given KEY from an Andersson TREE. Returns t if + the node was found and deleted, or nil if it wasn't there to begin with." + (declare (type aa-tree tree)) + (multiple-value-bind (candidate stack sp) (tree-probe tree key) + (when candidate + (decf sp 2) + (let ((node (svref stack sp))) + + ;; Unsplice the candidate node from the tree, leaving node as its + ;; replacement. + (if (eq candidate node) + (setf node nil) + (setf (node-key candidate) (node-key node) + (node-data candidate) (node-data node) + node (node-right node))) + + ;; Now wander back up the tree, fixing it as we go. + (loop (when (zerop sp) + (return)) + (decf sp 2) + (let ((parent (svref stack sp)) + (dir (svref stack (1+ sp)))) + (setf (svref parent dir) node + node parent)) + + ;; If there's a level difference between this node and its + ;; children, bring it (and, if it exists, its right + ;; counterpart) down one level. + (let ((level-1 (1- (node-level node))) + (left (node-left node)) + (right (node-right node))) + (when (flet ((level (node) + (if node (node-level node) -1))) + (declare (inline level)) + (or (< (level left) level-1) + (< (level right) level-1))) + (setf (node-level node) level-1) + (when (and right (> (node-level right) level-1)) + (setf (node-level right) level-1)) + + ;; Now we must fix up the balancing rules. Apparently + ;; three skews and two splits suffice. + (setf node (skew node)) + (let ((right (node-right node))) + (when right + (setf right (skew right) + (node-right node) right) + (let ((rright (node-right right))) + (when rright + (setf (node-right right) (skew rright)))))) + (setf node (split node)) + (let ((right (node-right node))) + (when right (setf (node-right node) (split right))))))) + + ;; Store the new root. + (setf (tree-root tree) node))))) + +(defun aa-tree-iterator (tree) + "Returns a tree iterator function for TREE. The function returns three + values. For each node in the tree, it returns t, the key and the value; + then, it returns nil three times." + (let ((root (tree-root tree))) + (if (null root) + (lambda () (values nil nil nil)) + (let ((stack (make-array (* 2 (1+ (node-level root))))) + (sp 0)) + (flet ((pushleft (node) + (do ((node node (node-left node))) + ((null node)) + (setf (svref stack sp) node) + (incf sp)))) + (pushleft root) + (lambda () + (cond ((zerop sp) (values nil nil nil)) + (t (let ((node (svref stack (decf sp)))) + (pushleft (node-right node)) + (values t (node-key node) (node-data node))))))))))) + +(defun mapaa (func tree) + "Apply FUNC to each key and value in the TREE." + (labels ((walk (node) + (when node + (walk (node-left node)) + (funcall func (node-key node) (node-data node)) + (walk (node-right node))))) + (walk (tree-root tree)) + nil)) + +(defmacro doaa ((key value tree &optional result) &body body) + "Iterate over the items of TREE; for each one, bind KEY to its key and + VALUE to the associated data, and evaluate BODY, which is an implicit + tagbody. Finally, return RESULT. Either KEY or VALUE (or both!) may be + nil to indicate `don't care'." + (with-parsed-body (body decls) body + (let ((ignores nil)) + (unless key (setf key (gensym "KEY")) (push key ignores)) + (unless value (setf value (gensym "VALUE")) (push value ignores)) + `(block nil + (mapaa (lambda (,key ,value) + ,@decls + ,@(and ignores `((declare (ignore ,@ignores)))) + (tagbody ,@body)) + ,tree) + ,result)))) + +;;;-------------------------------------------------------------------------- +;;; Testing. + +#+debug +(defun tree-print (tree &optional (stream *standard-output*)) + "Print a TREE to an output STREAM in a comprehesible way." + (labels ((walk (depth node) + (when node + (walk (1+ depth) (node-left node)) + (format stream "~v@T~A: ~S => ~S~%" + (* depth 2) + (node-level node) + (node-key node) + (node-data node)) + (walk (1+ depth) (node-right node))))) + (walk 0 (tree-root tree)))) + +(defun tree-build (key< &rest items) + "Return a new tree sorted according to KEY<, containing the given ITEMS." + (let ((tree (make-aa-tree key<))) + (dolist (item items) + (setf (getaa tree item) nil)) + tree)) + +#+debug +(defun test-iterator (tree) + (let ((iter (aa-tree-iterator tree))) + (mapaa (lambda (key value) + (multiple-value-bind (iwin ikey ivalue) (funcall iter) + (assert (and iwin + (eql key ikey) + (eql value ivalue))))) + tree) + (assert (null (nth-value 0 (funcall iter)))))) + +#+debug +(defun tree-check (tree) + "Checks the invariants on a TREE." + (let ((key< (tree-key< tree))) + (labels ((check (node) + (if (null node) + (values nil nil) + (let ((key (node-key node)) + (level (node-level node)) + (left (node-left node)) + (right (node-right node))) + (multiple-value-bind (lmin lmax) (check left) + (multiple-value-bind (rmin rmax) (check right) + (assert (or (null lmax) (funcall key< lmax key))) + (assert (or (null rmin) (funcall key< key rmin))) + (assert (if (null left) + (= level 0) + (= (node-level left) (- level 1)))) + (assert (if (null right) + (= level 0) + (let ((rright (node-right right))) + (or (= (node-level right) (- level 1)) + (and (= (node-level right) level) + (or (null rright) + (= (node-level rright) + (- level 1)))))))) + (values (or lmin key) (or rmax key)))))))) + (check (tree-root tree))))) + +#+debug +(defun test (&key (state (make-random-state)) + (count nil) + (items nil) + (verbose 1)) + (let ((in (make-array 0 :element-type 'string + :adjustable t :fill-pointer 0)) + (out (make-array 0 :element-type 'string + :adjustable t :fill-pointer 0)) + (tree (make-aa-tree #'string<))) + + ;; Slurp in the word list + (with-open-file (dict #p"/usr/share/dict/words") + (loop for line = (read-line dict nil) + while (and line (not (eql items 0))) + do (vector-push-extend line out) + when items do (decf items))) + + (labels ((add (v w) + (vector-push-extend w v)) + (rm (v i) + (let ((n (1- (length v)))) + (setf (aref v i) (aref v n)) + (decf (fill-pointer v)))) + (insert () + (let* ((i (random (length out) state)) + (w (aref out i))) + (setf (getaa tree w) nil) + (rm out i) + (add in w) + (when (>= verbose 2) (format t "insert ~A~%" w)))) + (remove () + (let* ((i (random (length in) state)) + (w (aref in i))) + (remaa tree w) + (rm in i) + (add out w) + (when (>= verbose 2) (format t "remove ~A~%" w)))) + (check () + (when (>= verbose 2) (format t "check...~%")) + (tree-check tree) + (sort in #'string<) + (loop with i = (aa-tree-iterator tree) + for w across in + for (win key value) = (multiple-value-list (funcall i)) + do (assert (eq w (and win key))) + while w + finally (assert (null (nth-value 0 (funcall i))))))) + (loop with prob = (if count (/ count 100) 1000) + until (eql count 0) + when count do (decf count) + do (case (random prob state) + (0 (check) (when (= verbose 1) (write-char #\?))) + (t (if (< (random (+ (length in) (length out)) state) + (length out)) + (progn (insert) + (when (= verbose 1) (write-char #\+))) + (progn (remove) + (when (= verbose 1) (write-char #\-)))))) + do (force-output) + finally (check))))) + +;;;----- That's all, folks -------------------------------------------------- diff --git a/mdw.asd b/mdw.asd index 2c689ab..d9ed4c6 100644 --- a/mdw.asd +++ b/mdw.asd @@ -17,6 +17,7 @@ (:file "collect" :depends-on ("mdw-base")) #+cmu (:file "unix" :depends-on ("mdw-base" "collect")) (:file "safely" :depends-on ("mdw-base")) + (:file "aa-tree" :depends-on ("mdw-base")) (:file "infix") (:file "infix-ext" :depends-on ("mdw-base" "infix" -- 2.11.0