linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org
Cc: Christoph Lameter <clameter@engr.sgi.com>,
	"Paul E. McKenney" <paulmck@us.ibm.com>,
	Nick Piggin <npiggin@suse.de>, Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 1/5] RCU friendly B+tree
Date: Tue, 06 Mar 2007 02:38:16 +0100	[thread overview]
Message-ID: <20070306014211.048104000@taijtu.programming.kicks-ass.net> (raw)
In-Reply-To: <20070306013815.951032000@taijtu.programming.kicks-ass.net>

[-- Attachment #1: btree.patch --]
[-- Type: text/plain, Size: 27934 bytes --]

A RCU friendly B+tree

TODO:
 - review memory barriers
 - at least rewrite search using an iterative approach
 - more documentation

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/btree.h |  198 ++++++++++
 lib/Makefile          |    2 
 lib/btree.c           |  924 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1123 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/btree.h
===================================================================
--- /dev/null
+++ linux-2.6/include/linux/btree.h
@@ -0,0 +1,198 @@
+#ifndef _LINUX_BTREE_H
+#define _LINUX_BTREE_H
+
+#define BTREE_DEBUG	1
+
+#include <linux/mempool.h>
+#include <linux/log2.h>
+#include <linux/rcupdate.h>
+#include <asm/cache.h>
+#include <asm/bitops.h>
+
+struct btree_item {
+	unsigned long key;
+	union {
+		struct btree_node *child;
+		void *data;
+	};
+};
+
+#define BTREE_ITEM_CACHELINE (L1_CACHE_BYTES / sizeof(struct btree_item))
+#define BTREE_ITEM_MAX (BTREE_ITEM_CACHELINE < 16 ? 16 : BTREE_ITEM_CACHELINE)
+#define BTREE_ITEM_HALF (BTREE_ITEM_MAX/2)
+
+struct btree_node {
+	struct btree_item item[BTREE_ITEM_MAX];
+};
+
+/*
+ * Max number of nodes to be RCUed per modification.
+ * Assumes BTREE_ITEM_MAX is power of two.
+ */
+#define BTREE_NODE_REPLACE \
+	(2 * (1 + DIV_ROUND_UP(BITS_PER_LONG, ilog2(BTREE_ITEM_MAX))))
+
+struct btree_freevec {
+	struct rcu_head rcu_head;
+	int pos;
+	void *slot[BTREE_NODE_REPLACE];
+};
+
+struct btree_root {
+	struct btree_node *root;
+	int height;
+	mempool_t *mempool;
+	struct btree_freevec *freevec;
+	gfp_t gfp_mask;
+	void (*flush)(struct btree_freevec *);
+};
+
+#ifdef BTREE_DEBUG
+void btree_print(struct btree_root *);
+#else
+#define btree_print(root) do { } while (0)
+#endif
+
+static inline int btree_item_leaf(struct btree_item *itemp)
+{
+	return !test_bit(0, (void *)&itemp->child);
+}
+
+static inline struct btree_node *btree_item_deref(struct btree_item *itemp)
+{
+	struct btree_node *child = rcu_dereference(itemp->child);
+	__clear_bit(0, (void *)&child);
+	return child;
+}
+
+extern struct btree_item *__btree_search(struct btree_root *,
+		struct btree_node *, unsigned long, int, int *);
+
+extern struct btree_item *__btree_search_next(struct btree_root *,
+		struct btree_node *, struct btree_item **,
+		unsigned long, int, int *);
+
+static inline void *btree_lookup(struct btree_root *root, unsigned long index)
+{
+	int retry = 0;
+	struct btree_item *item;
+
+
+	if (!root->root)
+		return NULL;
+
+	item = __btree_search(root, root->root, index,
+			root->height - 1, &retry);
+
+	if (retry || item->key != index)
+		return NULL;
+
+	return item->data;
+}
+
+static inline void *btree_lookup_next(struct btree_root *root,
+		unsigned long index, void **nextp)
+{
+	int retry = 0;
+	struct btree_item *next = NULL, *item;
+
+	*nextp = NULL;
+	if (!root->root)
+		return NULL;
+
+	item = __btree_search_next(root, root->root, &next,
+			index, root->height - 1, &retry);
+
+	switch (retry) {
+		case -1:
+			*nextp = next->data;
+			return NULL;
+
+		case 0:
+			*nextp = next->data;
+			/* fall through */
+		case 1:
+			if (item->key != index)
+				return NULL;
+			break;
+	}
+
+	return item->data;
+}
+
+static inline void *btree_stab(struct btree_root *root, unsigned long index)
+{
+	int retry = 0;
+	struct btree_item *item;
+
+
+	if (!root->root)
+		return NULL;
+
+	item = __btree_search(root, root->root, index,
+			root->height - 1, &retry);
+
+	if (retry)
+		return NULL;
+
+	return item->data;
+}
+
+static inline void *btree_stab_next(struct btree_root *root,
+		unsigned long index, void **nextp)
+{
+	int retry = 0;
+	struct btree_item *next = NULL, *item;
+
+	*nextp = NULL;
+	if (!root->root)
+		return NULL;
+
+	item = __btree_search_next(root, root->root, &next,
+			index, root->height - 1, &retry);
+
+	switch (retry) {
+		case -1:
+			*nextp = next->data;
+			return NULL;
+
+		case 0:
+			*nextp = next->data;
+			/* fall through */
+		case 1:
+			break;
+	}
+
+	if (!item) {
+		printk(KERN_DEBUG "stab_next: %lu next: %p\n", index, *nextp);
+		btree_print(root);
+		BUG();
+	}
+
+	return item->data;
+}
+
+extern int btree_preload(struct btree_root *, gfp_t);
+extern int btree_insert(struct btree_root *, unsigned long, void *);
+extern int btree_update(struct btree_root *, unsigned long, unsigned long);
+extern void *btree_remove(struct btree_root *, unsigned long);
+
+extern void btree_root_init(struct btree_root *, gfp_t);
+extern void btree_root_destroy(struct btree_root *);
+
+extern void btree_freevec_flush(struct btree_freevec *);
+
+#define BTREE_INIT_FLUSH(gfp, f) (struct btree_root){ 	\
+	.root = NULL, 					\
+	.height = 0,					\
+	.mempool = NULL,				\
+	.freevec = NULL,				\
+	.gfp_mask = (gfp),				\
+	.flush = (f),					\
+}
+
+#define BTREE_INIT(gfp) BTREE_INIT_FLUSH(gfp, btree_freevec_flush)
+
+extern void __init btree_init(void);
+
+#endif /* _LINUX_BTREE_H */
Index: linux-2.6/lib/Makefile
===================================================================
--- linux-2.6.orig/lib/Makefile
+++ linux-2.6/lib/Makefile
@@ -5,7 +5,7 @@
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
 	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
-	 sha1.o irq_regs.o reciprocal_div.o
+	 sha1.o irq_regs.o reciprocal_div.o btree.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
Index: linux-2.6/lib/btree.c
===================================================================
--- /dev/null
+++ linux-2.6/lib/btree.c
@@ -0,0 +1,924 @@
+/*
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * Something that started out as an RCU friendly B+tree.
+ *
+ * Contrairy to most B-trees the nodes have an even number of slots.  This can
+ * be done because we carry all the children in the leafs and thus we don't
+ * need to push one up on a split.  We can just duplicate the first.
+ *
+ * The inner nodes are basically an N-way space partition.
+ *
+ * Another difference to the typical B+tree is that this implementation does
+ * not thread the leafs, this is left up to the user if so desired.
+ *
+ *
+ *   [------------------------------>[------------------------------>
+ *   |                               |
+ *   [------------------->[--------->[-------------->[-------------->
+ *   |                    |          |               |
+ *   [..) [..) [..) [..)  [..) [..)  [..) [..) [..)  [..) [..) [..)
+ *
+ *
+ */
+#include <linux/btree.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/err.h>
+
+#ifdef BTREE_DEBUG
+void btree_print(struct btree_root *root);
+int btree_validate(struct btree_root *root);
+#else
+#define btree_print(root) do { } while (0)
+#define btree_validate(root) (0)
+#endif
+
+static struct kmem_cache *btree_cachep;
+
+static inline void btree_free(struct btree_root *root, void *ptr)
+{
+	root->freevec->slot[root->freevec->pos++] = ptr;
+}
+
+static inline void btree_ptr_flip(struct btree_root *root,
+		struct btree_node **nodep, struct btree_node *new)
+{
+	struct btree_node *old = rcu_dereference(*nodep);
+	rcu_assign_pointer(*nodep, new);
+	__clear_bit(0, (void *)&old);
+	btree_free(root, old);
+}
+
+static inline struct btree_node *btree_node_child(struct btree_node *nodep)
+{
+	__set_bit(0, (void *)&nodep);
+	return nodep;
+}
+
+static inline void btree_item_flip(struct btree_root *root,
+		struct btree_item *itemp, struct btree_node *new)
+{
+	btree_ptr_flip(root, &itemp->child, btree_node_child(new));
+	/* possibly tighten */
+	if (itemp->key != new->item[0].key) {
+		/*
+		 * we can only tighten when the new node is linked in
+		 */
+		smp_wmb();
+		itemp->key = new->item[0].key;
+	}
+}
+
+static inline void btree_item_free(struct btree_root *root,
+		struct btree_item *itemp)
+{
+	if (!btree_item_leaf(itemp))
+		btree_free(root, btree_item_deref(itemp));
+}
+
+int btree_preload(struct btree_root *root, gfp_t gfp_mask)
+{
+	int ret = 0;
+
+	if (!root->mempool) {
+		root->mempool = mempool_create_slab_pool(0, btree_cachep);
+		if (!root->mempool)
+			return -ENOMEM;
+	}
+
+	if (!root->freevec) {
+		root->freevec = kzalloc(sizeof(struct btree_freevec), gfp_mask);
+		if (!root->freevec)
+			return -ENOMEM;
+	}
+
+	ret = mempool_resize(root->mempool, 1+2*root->height, gfp_mask);
+
+	return ret;
+}
+
+static int btree_mod_init(struct btree_root *root)
+{
+	return btree_preload(root, root->gfp_mask);
+}
+
+static int btree_mod_finish(struct btree_root *root, unsigned long index)
+{
+	int ret = 0;
+	if (root->mempool)
+		ret = mempool_resize(root->mempool, 1, root->gfp_mask);
+
+	if (btree_validate(root)) {
+		printk(KERN_DEBUG "modified: %lu\n", index);
+		btree_print(root);
+		BUG();
+	}
+
+	root->flush(root->freevec);
+	root->freevec = NULL;
+
+	return ret;
+}
+
+/* ------ search ------- */
+
+/*
+ * Since we can re-adjust the space partitioning in delete and update a
+ * lockless lookup might hit a wrong branch once in a while, hence we might
+ * need to backtrack.
+ *
+ * Because we do a search for a less than or equal index we can only backtrack.
+ * That is a split might over estimate (too loose) but never under estimate
+ * (too tight).  Thus we can loosen on our way down, but have to tighten on
+ * our way up.  See update and remove.
+ *
+ * NOTE the backtrack should be limited to one entry so worst time is:
+ *   O(2*log(n)-1)
+ */
+
+/*
+ * find which branch to take
+ */
+static inline
+int __btree_item_search(struct btree_root *root,
+		struct btree_node *node, unsigned long index)
+{
+	int i;
+	/*
+	 * match for the wmb in btree_item_flip() (??)
+	 */
+	smp_rmb();
+	for (i = 1; i < BTREE_ITEM_MAX; i++)
+		if (node->item[i].child == NULL ||
+				index < node->item[i].key)
+			break;
+	i--;
+	return i;
+}
+
+/*
+ * find the item with a lesser or equal index
+ */
+struct btree_item *__btree_search(struct btree_root *root,
+		struct btree_node *node, unsigned long index, int height,
+		int *retryp)
+{
+	struct btree_item *ret;
+	int i, retry;
+
+	i = __btree_item_search(root, node, index);
+
+	if (height == 0) {
+		retry = (node->item[i].key <= index) ? 0 : -1;
+		if (unlikely(retry)) {
+			i += retry;
+			if (i < 0) {
+				*retryp = retry;
+				return NULL;
+			}
+		}
+		return &node->item[i];
+	}
+
+again:
+	retry = 0;
+	ret = __btree_search(root, btree_item_deref(&node->item[i]),
+			index, height - 1, &retry);
+
+	if (unlikely(retry)) {
+		i += retry;
+		if (i < 0) {
+			*retryp = retry;
+			return NULL;
+		}
+		goto again;
+	}
+
+	return ret;
+}
+
+/*
+ * get the first item for a sub-tree
+ */
+static
+struct btree_item *__btree_search_first(struct btree_root *root,
+		struct btree_node *node, int height)
+{
+	if (height == 0)
+		return &node->item[0];
+
+	return __btree_search_first(root, btree_item_deref(&node->item[0]),
+			height - 1);
+}
+
+/*
+ * find the item with a lesser or equal index and the next item.
+ */
+struct btree_item *__btree_search_next(struct btree_root *root,
+		struct btree_node *node, struct btree_item **nextp,
+		unsigned long index, int height, int *retryp)
+{
+	struct btree_item *ret;
+	int i, retry;
+
+	i = __btree_item_search(root, node, index);
+
+	if (height == 0) {
+		retry = (node->item[i].key <= index) ? 0 : -1;
+		if (retry) {
+			*nextp = &node->item[i];
+			i += retry;
+			if (i < 0) {
+				*retryp = retry;
+				return NULL;
+			}
+		} else if (!*nextp) {
+			if ((i + 1) >= BTREE_ITEM_MAX || !node->item[i+1].data)
+				*retryp = 1;
+			else
+				*nextp = &node->item[i+1];
+		}
+		return &node->item[i];
+	}
+
+again:
+	retry = 0;
+	ret = __btree_search_next(root,
+			btree_item_deref(&node->item[i]),
+			nextp, index, height - 1, &retry);
+
+	i += retry;
+	switch (retry) {
+	case -1:
+		if (i < 0) {
+			*retryp = retry;
+			return NULL;
+		}
+		goto again;
+
+	case 1:
+		if (i >= BTREE_ITEM_MAX || !node->item[i].child)
+			*retryp = retry;
+		else
+			*nextp = __btree_search_first(root,
+					btree_item_deref(&node->item[i]),
+					height - 1);
+		break;
+
+	case 0:
+		break;
+	}
+
+	return ret;
+}
+
+/* ------ insert ------- */
+
+/*
+ * recusrive insert item; split nodes when needed
+ */
+static int __btree_insert(struct btree_root *root,
+		struct btree_node **nodep, struct btree_node **splitp,
+		struct btree_item *itemp, int height)
+{
+	struct btree_node *node = *nodep;
+	struct btree_node *new, *split;
+	struct btree_node *update = NULL;
+	int i, j, ret;
+
+	i = __btree_item_search(root, node, itemp->key);
+
+	if (height == 0) {
+		if (node->item[i].child && node->item[i].key == itemp->key)
+			return -EEXIST;
+
+		if (node->item[i].child && node->item[i].key < itemp->key)
+			i++; /* insert after */
+	} else {
+		struct btree_node *child = btree_item_deref(&node->item[i]);
+		new = child;
+		split = NULL;
+		ret = __btree_insert(root, &new, &split, itemp, height - 1);
+		if (ret)
+			return ret;
+
+		if (new == child)
+			return 0;
+
+		if (split == NULL) {
+			btree_item_flip(root, &node->item[i], new);
+			return 0;
+		}
+		update = new;
+
+		i++; /* insert after */
+	}
+
+	new = mempool_alloc(root->mempool, root->gfp_mask);
+	memset(new, 0, sizeof(struct btree_node));
+	/* insert has room */
+	if (node->item[BTREE_ITEM_MAX-1].data == NULL) {
+		for (j = 0; j < i; j++)
+			new->item[j] = node->item[j];
+		new->item[j] = *itemp;
+		for (; j < BTREE_ITEM_MAX-1; j++)
+			new->item[j+1] = node->item[j];
+
+		if (update)
+			btree_item_flip(root, &new->item[i-1], update);
+
+		*nodep = new;
+		return 0;
+	}
+
+	split = mempool_alloc(root->mempool, root->gfp_mask);
+	memset(split, 0, sizeof(struct btree_node));
+	/* insert overflows - split */
+	if (i < BTREE_ITEM_HALF) {
+		for (j = 0; j < i; j++)
+			new->item[j] = node->item[j];
+		new->item[j] = *itemp;
+		for (; j < BTREE_ITEM_HALF; j++)
+			new->item[j+1] = node->item[j];
+
+		for (; j < BTREE_ITEM_MAX; j++)
+			split->item[j - BTREE_ITEM_HALF] = node->item[j];
+	} else {
+		for (j = 0; j < BTREE_ITEM_HALF; j++)
+			new->item[j] = node->item[j];
+
+		for (; j < i; j++)
+			split->item[j - BTREE_ITEM_HALF] = node->item[j];
+		split->item[j - BTREE_ITEM_HALF] = *itemp;
+		for (; j < BTREE_ITEM_MAX; j++)
+			split->item[j + 1 - BTREE_ITEM_HALF] = node->item[j];
+	}
+
+	if (update) {
+		if (i-1 < BTREE_ITEM_HALF)
+			btree_item_flip(root, &new->item[i-1], update);
+		else
+			btree_item_flip(root, &split->item[i-1-BTREE_ITEM_HALF], update);
+	}
+
+	*nodep = new;
+	*splitp = split;
+	*itemp = (struct btree_item){
+		.key = split->item[0].key,
+		{ .child = btree_node_child(split) }
+	};
+	return 0;
+}
+
+/*
+ * handle root updates
+ */
+int btree_insert(struct btree_root *root, unsigned long index, void *data)
+{
+	struct btree_item item = (struct btree_item){
+		.key = index,
+		{ .data = data }
+	};
+	struct btree_node *new, *node, *split = NULL;
+	int ret = btree_mod_init(root);
+	if (ret)
+		return ret;
+
+	if (!root->root) {
+		root->root = mempool_alloc(root->mempool, root->gfp_mask);
+		memset(root->root, 0, sizeof(struct btree_node));
+		root->height = 1;
+	}
+
+	node = root->root;
+	ret = __btree_insert(root, &node, &split, &item, root->height - 1);
+	if (ret)
+		goto out;
+
+	if (node == root->root)
+		goto out;
+
+	if (split == NULL) {
+		btree_ptr_flip(root, &root->root, node);
+		goto out;
+	}
+
+	new = mempool_alloc(root->mempool, root->gfp_mask);
+	memset(new, 0, sizeof(struct btree_node));
+	new->item[0] = (struct btree_item){
+		.key = node->item[0].key,
+		{ .child = btree_node_child(node) }
+	};
+	new->item[1] = (struct btree_item){
+		.key = split->item[0].key,
+		{ .child = btree_node_child(split) }
+	};
+	root->height++;
+
+	btree_ptr_flip(root, &root->root, new);
+
+out:
+	btree_mod_finish(root, index);
+	return ret;
+}
+
+/* ------ update ------- */
+
+/*
+ * update the index of an item
+ *
+ * be careful the range:
+ *   [min(index, new_index), max(index, new_index]
+ * _MUST_ be free, otherwise remove and reinsert.
+ *
+ * see the search comment for lockless lookup constraints
+ */
+static int __btree_update(struct btree_root *root, struct btree_node *node,
+		unsigned long index, unsigned long new_index, int height)
+{
+	struct btree_node *child;
+	unsigned long key;
+	int i, ret;
+
+	i = __btree_item_search(root, node, index);
+	key = node->item[i].key;
+
+	if (height == 0) {
+		if (key != index)
+			return -EINVAL;
+		node->item[i].key = new_index;
+		return 0;
+	}
+
+	child = btree_item_deref(&node->item[i]);
+	/* loosen downwards */
+	if (index > new_index && key == index)
+		key = node->item[i].key = new_index;
+	ret = __btree_update(root, child, index, new_index, height - 1);
+	/* undo on error */
+	if (ret && index > new_index && key == new_index)
+		node->item[i].key = index;
+	/* tighten upwards */
+	if (!ret && index < new_index && key == index)
+		node->item[i].key = new_index;
+
+	return ret;
+}
+
+int btree_update(struct btree_root *root, unsigned long index, unsigned long new_index)
+{
+	int ret;
+
+	if (!root->root)
+		return 0;
+	if (index == new_index)
+		return 0;
+	ret = __btree_update(root, root->root, index, new_index,
+			root->height - 1);
+	if (btree_validate(root)) {
+		printk(KERN_DEBUG "btree_update: index: %lu new_index: %lu\n",
+				index, new_index);
+		btree_print(root);
+		BUG();
+	}
+	return ret;
+}
+
+/* ------ delete ------- */
+
+/*
+ * delete an item; borrow items or merge nodes when needed.
+ */
+static void *__btree_remove(struct btree_root *root,
+		struct btree_node **leftp,
+		struct btree_node **nodep,
+		struct btree_node **rightp,
+		unsigned long index, int height)
+{
+	struct btree_node *node = *nodep;
+	struct btree_node *new;
+	struct btree_node *update = NULL;
+	int i, j, k, n;
+	void *ret = NULL;
+
+	i = __btree_item_search(root, node, index);
+
+	if (height == 0) {
+		if (node->item[i].key != index) {
+			ret = ERR_PTR(-EINVAL);
+			goto done;
+		}
+		ret = node->item[i].data;
+	} else {
+		struct btree_node *oleft,  *left = NULL;
+		struct btree_node *ochild, *child;
+		struct btree_node *oright, *right = NULL;
+
+		if (i > 0)
+			left = btree_item_deref(&node->item[i-1]);
+		child = btree_item_deref(&node->item[i]);
+		if (i + 1 < BTREE_ITEM_MAX)
+			right = btree_item_deref(&node->item[i+1]);
+
+		oleft = left; ochild = child; oright = right;
+
+		ret = __btree_remove(root, &left, &child, &right,
+				index, height - 1);
+
+		/* no change */
+		if (child == ochild) {
+			/* possibly tighten the split on our way back up */
+			if (node->item[i].key != child->item[0].key)
+				node->item[i].key = child->item[0].key;
+			goto done;
+		}
+
+		/* single change */
+		if (left == oleft && right == oright) {
+			btree_item_flip(root, &node->item[i], child);
+			goto done;
+		}
+
+		/* borrowed left  */
+		if (oleft && left && left != oleft && child) {
+			new = mempool_alloc(root->mempool, root->gfp_mask);
+			memset(new, 0, sizeof(struct btree_node));
+			for (j = 0; j < i-1; j++)
+				new->item[j] = node->item[j];
+			new->item[j++] = (struct btree_item){
+				.key = left->item[0].key,
+				{ .child = btree_node_child(left) }
+			};
+			new->item[j++] = (struct btree_item){
+				.key = child->item[0].key,
+				{ .child = btree_node_child(child) }
+			};
+			for (; j < BTREE_ITEM_MAX; j++)
+				new->item[j] = node->item[j];
+
+			btree_item_free(root, &node->item[i-1]);
+			btree_item_free(root, &node->item[i]);
+
+			*nodep = new;
+			goto done;
+		}
+
+		/* borrowed right */
+		if (oright && right && right != oright && child) {
+			new = mempool_alloc(root->mempool, root->gfp_mask);
+			memset(new, 0, sizeof(struct btree_node));
+			for (j = 0; j < i; j++)
+				new->item[j] = node->item[j];
+			new->item[j++] = (struct btree_item){
+				.key = child->item[0].key,
+				{ .child = btree_node_child(child) }
+			};
+			new->item[j++] = (struct btree_item){
+				.key = right->item[0].key,
+				{ .child = btree_node_child(right) }
+			};
+			for (; j < BTREE_ITEM_MAX; j++)
+				new->item[j] = node->item[j];
+
+			btree_item_free(root, &node->item[i]);
+			btree_item_free(root, &node->item[i+1]);
+
+			*nodep = new;
+			goto done;
+		}
+
+		/* merged left */
+		if (!child)
+			update = left;
+
+		/* merged right */
+		else if (oright && !right) {
+			update = child;
+			i++;
+		}
+	}
+
+	/* delete */
+	new = mempool_alloc(root->mempool, root->gfp_mask);
+	memset(new, 0, sizeof(struct btree_node));
+	if (node->item[BTREE_ITEM_HALF].child) {
+delete_one:
+		for (j = 0; j < i; j++)
+			new->item[j] = node->item[j];
+		for (j++; j < BTREE_ITEM_MAX; j++)
+			new->item[j-1] = node->item[j];
+
+		if (update)
+			btree_item_flip(root, &new->item[i-1], update);
+
+		btree_item_free(root, &node->item[i]);
+
+		*nodep = new;
+		goto done;
+	}
+
+	/* delete underflows */
+
+	/* borrow left */
+	if (*leftp && (*leftp)->item[BTREE_ITEM_HALF].child) {
+		struct btree_node *new_left;
+
+		new_left = mempool_alloc(root->mempool, root->gfp_mask);
+		memset(new_left, 0, sizeof(struct btree_node));
+
+		for (j = 0; j < BTREE_ITEM_MAX && (*leftp)->item[j].child; j++)
+			;
+		n = (j-BTREE_ITEM_HALF+1)/2;
+
+		for (k = 0; k < j-n; k++)
+			new_left->item[k] = (*leftp)->item[k];
+		for (j = 0; j < n; j++)
+			new->item[j] = (*leftp)->item[k+j];
+		for (j = 0; j < i; j++)
+			new->item[n+j] = node->item[j];
+		for (j++; j < BTREE_ITEM_HALF; j++)
+			new->item[n+j-1] = node->item[j];
+
+		if (update)
+			btree_item_flip(root, &new->item[n+i-1], update);
+
+		btree_item_free(root, &node->item[i]);
+
+		*leftp = new_left;
+		*nodep = new;
+		goto done;
+	}
+
+	/* borrow right */
+	if (*rightp && (*rightp)->item[BTREE_ITEM_HALF].child) {
+		struct btree_node *new_right;
+
+		new_right = mempool_alloc(root->mempool, root->gfp_mask);
+		memset(new_right, 0, sizeof(struct btree_node));
+
+		for (j = 0; j < BTREE_ITEM_MAX && (*rightp)->item[j].child ; j++)
+			;
+		n = (j-BTREE_ITEM_HALF+1)/2;
+
+		for (k = 0; k < i; k++)
+			new->item[k] = node->item[k];
+		for (k++; k < BTREE_ITEM_HALF; k++)
+			new->item[k-1] = node->item[k];
+		for (j = 0; j < n; j++)
+			new->item[k-1+j] = (*rightp)->item[j];
+		for (; j < BTREE_ITEM_MAX; j++)
+			new_right->item[j-n] = (*rightp)->item[j];
+
+		if (update)
+			btree_item_flip(root, &new->item[i-1], update);
+
+		btree_item_free(root, &node->item[i]);
+
+		*nodep = new;
+		*rightp = new_right;
+		goto done;
+	}
+
+	/* merge left */
+	if (*leftp) {
+		for (j = 0; j < BTREE_ITEM_HALF; j++)
+			new->item[j] = (*leftp)->item[j];
+		for (k = 0; k < i; k++)
+			new->item[j+k] = node->item[k];
+		for (k++; k < BTREE_ITEM_HALF; k++)
+			new->item[j+k-1] = node->item[k];
+
+		if (update)
+			btree_item_flip(root, &new->item[j+i-1], update);
+
+		btree_item_free(root, &node->item[i]);
+
+		*leftp = new;
+		*nodep = NULL;
+		goto done;
+	}
+
+	/* merge right */
+	if (*rightp) {
+		for (j = 0; j < i; j++)
+			new->item[j] = node->item[j];
+		for (j++; node->item[j].child; j++)
+			new->item[j-1] = node->item[j];
+		for (k = 0; (*rightp)->item[k].child; k++)
+			new->item[j-1+k] = (*rightp)->item[k];
+
+		if (update)
+			btree_item_flip(root, &new->item[i-1], update);
+
+		btree_item_free(root, &node->item[i]);
+
+		*nodep = new;
+		*rightp = NULL;
+		goto done;
+	}
+
+	/* only the root may underflow */
+	BUG_ON(root->root != node);
+	goto delete_one;
+
+done:
+	return ret;
+}
+
+void *btree_remove(struct btree_root *root, unsigned long index)
+{
+	void *ret = NULL;
+	int err = btree_mod_init(root);
+	if (err)
+		return ERR_PTR(err);
+
+	if (root->root) {
+		struct btree_node *left = NULL, *right = NULL;
+		struct btree_node *node = root->root;
+
+		ret = __btree_remove(root,
+				&left, &node, &right,
+				index, root->height - 1);
+
+		if (node != root->root) {
+			btree_ptr_flip(root, &root->root, node);
+		}
+
+		if (!root->root->item[1].child &&
+				!btree_item_leaf(&root->root->item[0])) {
+			root->height--;
+			btree_ptr_flip(root, &root->root,
+					btree_item_deref(&root->root->item[0]));
+		}
+
+		if (!root->root->item[0].child) {
+			btree_ptr_flip(root, &root->root, NULL);
+			root->height = 0;
+		}
+	}
+
+	btree_mod_finish(root, index);
+	return ret;
+}
+
+/* ------ */
+
+void btree_root_init(struct btree_root *root, gfp_t gfp_mask)
+{
+	*root = BTREE_INIT(gfp_mask);
+}
+
+void btree_freevec_flush(struct btree_freevec *freevec)
+{
+	int i;
+	for (i = 0; i < freevec->pos; i++)
+		kmem_cache_free(btree_cachep, freevec->slot[i]);
+	kfree(freevec);
+}
+
+void btree_root_destroy(struct btree_root *root)
+{
+	/* assume the tree is empty */
+	BUG_ON(root->height);
+	BUG_ON(root->freevec);
+	if (root->mempool)
+		mempool_destroy(root->mempool);
+}
+
+void __init btree_init(void)
+{
+	btree_cachep = kmem_cache_create("btree_node",
+			sizeof(struct btree_node),
+			0, SLAB_HWCACHE_ALIGN,
+			NULL, NULL);
+}
+
+#ifdef BTREE_DEBUG
+static void __btree_node_print(struct btree_root *root,
+		struct btree_node *node, int recurse)
+{
+	int i, j;
+	for (i = 0; i < BTREE_ITEM_MAX; i++) {
+		if (node->item[i].child) {
+			printk(KERN_DEBUG);
+			for (j=0; j<recurse; j++)
+				printk(" ");
+			if (btree_item_leaf(&node->item[i])) {
+
+	printk("-> leaf: %p, item: %d, key: %lu, data: %p\n",
+		node, i, node->item[i].key, node->item[i].data);
+
+			} else {
+
+	printk("node: %p, item: %d, key: %lu, child: %p\n",
+		node, i, node->item[i].key, btree_item_deref(&node->item[i]));
+				if (recurse)
+	__btree_node_print(root, btree_item_deref(&node->item[i]), recurse+1);
+
+			}
+		}
+	}
+}
+
+void btree_node_print(struct btree_root *root, struct btree_node *node)
+{
+	printk(KERN_DEBUG "node: %p\n", node);
+	__btree_node_print(root, node, 0);
+}
+
+void btree_print(struct btree_root *root)
+{
+	printk(KERN_DEBUG "[] root: %p, height: %d\n",
+			root->root, root->height);
+	if (root->root)
+		__btree_node_print(root, root->root, 1);
+}
+
+static unsigned long __btree_key(struct btree_root *root,
+	       	struct btree_node *node, int height)
+{
+	if (height == 0)
+		return node->item[0].key;
+
+	return __btree_key(root, btree_item_deref(&node->item[0]), height - 1);
+}
+
+static int __btree_validate(struct btree_root *root, struct btree_node *node,
+		unsigned long *pindex, int height)
+{
+	unsigned long parent_key = *pindex;
+	unsigned long node_key = 0;
+	unsigned long child_key = 0;
+
+	int i;
+	unsigned long key;
+	int nr = 0;
+	int bug = 0;
+
+	for (i = 0; i < BTREE_ITEM_MAX; i++) {
+		struct btree_node *child = btree_item_deref(&node->item[i]);
+		if (!child)
+			continue;
+
+		nr++;
+
+		key = node->item[i].key;
+		if (key < parent_key || (!i && key != parent_key) ||
+				(i && key == parent_key)) {
+			printk(KERN_DEBUG
+	"wrong parent split: key: %lu parent_key: %lu index: %d\n",
+				       key, parent_key, i);
+			bug++;
+		}
+
+		if (key < node_key || (i && key == node_key)) {
+			printk(KERN_DEBUG
+	"wrong order: key: %lu node_key: %lu index: %d\n",
+				       key, node_key, i);
+			bug++;
+		}
+		node_key = key;
+
+		if (key < child_key || (i && key == child_key)) {
+			printk(KERN_DEBUG
+	"wrong child split: key: %lu child_key: %lu index: %d\n",
+					key, child_key, i);
+			bug++;
+		}
+
+		child_key = max(node_key, child_key);
+
+		if (height)
+			bug += __btree_validate(root, child,
+					&child_key, height - 1);
+
+		*pindex = max(node_key, child_key);
+	}
+
+	if (node != root->root && nr < BTREE_ITEM_HALF) {
+		printk(KERN_DEBUG "node short\n");
+		bug++;
+	}
+
+	if (bug) {
+		printk(KERN_DEBUG "bug in node: %p\n", node);
+	}
+
+	return bug;
+}
+
+int btree_validate(struct btree_root *root)
+{
+	unsigned long key;
+
+	if (root->root) {
+		key = __btree_key(root, root->root, root->height - 1);
+		return __btree_validate(root, root->root, &key,
+				root->height - 1);
+	}
+
+	return 0;
+}
+#endif

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2007-03-06  1:38 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-06  1:38 [RFC][PATCH 0/5] Lockless vma lookups Peter Zijlstra
2007-03-06  1:38 ` Peter Zijlstra [this message]
2007-03-06  1:38 ` [RFC][PATCH 2/5] mm: use B+tree for vmas Peter Zijlstra
2007-03-06  1:38 ` [RFC][PATCH 3/5] mm: RCUify vma lookup Peter Zijlstra
2007-03-06  2:23   ` Nick Piggin
2007-03-06  8:36     ` Nick Piggin
2007-03-08 17:31       ` Ingo Molnar
2007-03-06 12:31     ` Peter Zijlstra
2007-03-06 12:35       ` Peter Zijlstra
2007-03-06  1:38 ` [RFC][PATCH 4/5] i386: lockless fault handler Peter Zijlstra
2007-03-06  1:38 ` [RFC][PATCH 5/5] x86_64: " Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070306014211.048104000@taijtu.programming.kicks-ass.net \
    --to=a.p.zijlstra@chello.nl \
    --cc=clameter@engr.sgi.com \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=npiggin@suse.de \
    --cc=paulmck@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox