Btrfs: Fixup the code to merge during path walks
Add a bulk insert/remove test to random-test
Add the quick-test code back as another regression test

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 58d4260..756b1cd 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -14,7 +14,7 @@
 	$(check) $<
 	$(CC) $(CFLAGS) -c $<
 
-all: tester debug-tree
+all: tester debug-tree quick-test
 
 debug-tree: $(objects) debug-tree.o
 	gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o
@@ -22,6 +22,9 @@
 tester: $(objects) random-test.o
 	gcc $(CFLAGS) -o tester $(objects) random-test.o
 
+quick-test: $(objects) quick-test.o
+	gcc $(CFLAGS) -o quick-test $(objects) quick-test.o
+
 $(objects) : $(headers)
 
 clean :
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index df4a19d..afa5bc5 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -12,6 +12,9 @@
 		      int data_size);
 static int push_node_left(struct ctree_root *root, struct tree_buffer *dst,
 			  struct tree_buffer *src);
+static int balance_node_right(struct ctree_root *root,
+			      struct tree_buffer *dst_buf,
+			      struct tree_buffer *src_buf);
 static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level,
 		   int slot);
 
@@ -217,15 +220,16 @@
 	int ret = 0;
 	int wret;
 	int pslot;
-	int used = 0;
-	int count;
 	int orig_slot = path->slots[level];
+	u64 orig_ptr;
 
 	if (level == 0)
 		return 0;
 
 	mid_buf = path->nodes[level];
 	mid = &mid_buf->node;
+	orig_ptr = mid->blockptrs[orig_slot];
+
 	if (level < MAX_LEVEL - 1)
 		parent_buf = path->nodes[level + 1];
 	pslot = path->slots[level + 1];
@@ -253,24 +257,26 @@
 	if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4)
 		return 0;
 
-	// print_tree(root, root->node);
 	left_buf = read_node_slot(root, parent_buf, pslot - 1);
 	right_buf = read_node_slot(root, parent_buf, pslot + 1);
-	if (right_buf) {
-		right = &right_buf->node;
-		used = right->header.nritems;
-		count = 1;
-	}
+
+	/* first, try to make some room in the middle buffer */
 	if (left_buf) {
 		left = &left_buf->node;
-		used += left->header.nritems;
 		orig_slot += left->header.nritems;
-		count++;
+		wret = push_node_left(root, left_buf, mid_buf);
+		if (wret < 0)
+			ret = wret;
 	}
-	if (left_buf)
-		push_node_left(root, left_buf, mid_buf);
+
+	/*
+	 * then try to empty the right most buffer into the middle
+	 */
 	if (right_buf) {
-		push_node_left(root, mid_buf, right_buf);
+		right = &right_buf->node;
+		wret = push_node_left(root, mid_buf, right_buf);
+		if (wret < 0)
+			ret = wret;
 		if (right->header.nritems == 0) {
 			u64 blocknr = right_buf->blocknr;
 			tree_block_release(root, right_buf);
@@ -285,9 +291,29 @@
 		} else {
 			memcpy(parent->keys + pslot + 1, right->keys,
 				sizeof(struct key));
+			wret = write_tree_block(root, parent_buf);
+			if (wret)
+				ret = wret;
 		}
 	}
+	if (mid->header.nritems == 1) {
+		/*
+		 * we're not allowed to leave a node with one item in the
+		 * tree during a delete.  A deletion from lower in the tree
+		 * could try to delete the only pointer in this node.
+		 * So, pull some keys from the left.
+		 * There has to be a left pointer at this point because
+		 * otherwise we would have pulled some pointers from the
+		 * right
+		 */
+		BUG_ON(!left_buf);
+		wret = balance_node_right(root, mid_buf, left_buf);
+		if (wret < 0)
+			ret = wret;
+		BUG_ON(wret == 1);
+	}
 	if (mid->header.nritems == 0) {
+		/* we've managed to empty the middle node, drop it */
 		u64 blocknr = mid_buf->blocknr;
 		tree_block_release(root, mid_buf);
 		mid_buf = NULL;
@@ -298,11 +324,17 @@
 		wret = free_extent(root, blocknr, 1);
 		if (wret)
 			ret = wret;
-	} else
+	} else {
+		/* update the parent key to reflect our changes */
 		memcpy(parent->keys + pslot, mid->keys, sizeof(struct key));
+		wret = write_tree_block(root, parent_buf);
+		if (wret)
+			ret = wret;
+	}
 
+	/* update the path */
 	if (left_buf) {
-		if (left->header.nritems >= orig_slot) {
+		if (left->header.nritems > orig_slot) {
 			left_buf->count++; // released below
 			path->nodes[level] = left_buf;
 			path->slots[level + 1] -= 1;
@@ -314,12 +346,15 @@
 			path->slots[level] = orig_slot;
 		}
 	}
+	/* double check we haven't messed things up */
+	check_block(path, level);
+	if (orig_ptr != path->nodes[level]->node.blockptrs[path->slots[level]])
+		BUG();
 
 	if (right_buf)
 		tree_block_release(root, right_buf);
 	if (left_buf)
 		tree_block_release(root, left_buf);
-
 	return ret;
 }
 
@@ -378,6 +413,7 @@
 					goto again;
 				c = &b->node;
 				slot = p->slots[level];
+				BUG_ON(c->header.nritems == 1);
 			}
 			b = read_tree_block(root, c->blockptrs[slot]);
 		} else {
@@ -433,13 +469,7 @@
 
 /*
  * try to push data from one node into the next node left in the
- * tree.  The src node is found at specified level in the path.
- * If some bytes were pushed, return 0, otherwise return 1.
- *
- * Lower nodes/leaves in the path are not touched, higher nodes may
- * be modified to reflect the push.
- *
- * The path is altered to reflect the push.
+ * tree.
  *
  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
  * error, and > 0 if there was no room in the left hand block.
@@ -463,7 +493,8 @@
 	}
 
 	if (src_nritems < push_items)
-		push_items =src_nritems;
+		push_items = src_nritems;
+
 	memcpy(dst->keys + dst_nritems, src->keys,
 		push_items * sizeof(struct key));
 	memcpy(dst->blockptrs + dst_nritems, src->blockptrs,
@@ -488,6 +519,64 @@
 }
 
 /*
+ * try to push data from one node into the next node right in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the right hand block.
+ *
+ * this will  only push up to 1/2 the contents of the left node over
+ */
+static int balance_node_right(struct ctree_root *root,
+			      struct tree_buffer *dst_buf,
+			      struct tree_buffer *src_buf)
+{
+	struct node *src = &src_buf->node;
+	struct node *dst = &dst_buf->node;
+	int push_items = 0;
+	int max_push;
+	int src_nritems;
+	int dst_nritems;
+	int ret = 0;
+	int wret;
+
+	src_nritems = src->header.nritems;
+	dst_nritems = dst->header.nritems;
+	push_items = NODEPTRS_PER_BLOCK - dst_nritems;
+	if (push_items <= 0) {
+		return 1;
+	}
+
+	max_push = src_nritems / 2 + 1;
+	/* don't try to empty the node */
+	if (max_push > src_nritems)
+		return 1;
+	if (max_push < push_items)
+		push_items = max_push;
+
+	memmove(dst->keys + push_items, dst->keys,
+		dst_nritems * sizeof(struct key));
+	memmove(dst->blockptrs + push_items, dst->blockptrs,
+		dst_nritems * sizeof(u64));
+	memcpy(dst->keys, src->keys + src_nritems - push_items,
+		push_items * sizeof(struct key));
+	memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
+		push_items * sizeof(u64));
+
+	src->header.nritems -= push_items;
+	dst->header.nritems += push_items;
+
+	wret = write_tree_block(root, src_buf);
+	if (wret < 0)
+		ret = wret;
+
+	wret = write_tree_block(root, dst_buf);
+	if (wret < 0)
+		ret = wret;
+	return ret;
+}
+
+/*
  * helper function to insert a new root level in the tree.
  * A new node is allocated, and a single item is inserted to
  * point to the existing root
diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c
new file mode 100644
index 0000000..dbd00c3
--- /dev/null
+++ b/fs/btrfs/quick-test.c
@@ -0,0 +1,165 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+
+/* for testing only */
+int next_key(int i, int max_key) {
+	return rand() % max_key;
+	//return i;
+}
+
+int main(int ac, char **av) {
+	struct key ins;
+	struct key last = { (u64)-1, 0, 0};
+	char *buf;
+	int i;
+	int num;
+	int ret;
+	int run_size = 100000;
+	int max_key =  100000000;
+	int tree_size = 0;
+	struct ctree_path path;
+	struct ctree_super_block super;
+	struct ctree_root *root;
+
+	radix_tree_init();
+
+	root = open_ctree("dbfile", &super);
+	srand(55);
+	for (i = 0; i < run_size; i++) {
+		buf = malloc(64);
+		num = next_key(i, max_key);
+		// num = i;
+		sprintf(buf, "string-%d", num);
+		if (i % 10000 == 0)
+			fprintf(stderr, "insert %d:%d\n", num, i);
+		ins.objectid = num;
+		ins.offset = 0;
+		ins.flags = 0;
+		ret = insert_item(root, &ins, buf, strlen(buf));
+		if (!ret)
+			tree_size++;
+		free(buf);
+	}
+	write_ctree_super(root, &super);
+	close_ctree(root);
+
+	root = open_ctree("dbfile", &super);
+	printf("starting search\n");
+	srand(55);
+	for (i = 0; i < run_size; i++) {
+		num = next_key(i, max_key);
+		ins.objectid = num;
+		init_path(&path);
+		if (i % 10000 == 0)
+			fprintf(stderr, "search %d:%d\n", num, i);
+		ret = search_slot(root, &ins, &path, 0);
+		if (ret) {
+			print_tree(root, root->node);
+			printf("unable to find %d\n", num);
+			exit(1);
+		}
+		release_path(root, &path);
+	}
+	write_ctree_super(root, &super);
+	close_ctree(root);
+	root = open_ctree("dbfile", &super);
+	printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
+	        node_level(root->node->node.header.flags),
+		root->node->node.header.nritems,
+		NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
+	printf("all searches good, deleting some items\n");
+	i = 0;
+	srand(55);
+	for (i = 0 ; i < run_size/4; i++) {
+		num = next_key(i, max_key);
+		ins.objectid = num;
+		init_path(&path);
+		ret = search_slot(root, &ins, &path, -1);
+		if (!ret) {
+			if (i % 10000 == 0)
+				fprintf(stderr, "del %d:%d\n", num, i);
+			ret = del_item(root, &path);
+			if (ret != 0)
+				BUG();
+			tree_size--;
+		}
+		release_path(root, &path);
+	}
+	write_ctree_super(root, &super);
+	close_ctree(root);
+	root = open_ctree("dbfile", &super);
+	srand(128);
+	for (i = 0; i < run_size; i++) {
+		buf = malloc(64);
+		num = next_key(i, max_key);
+		sprintf(buf, "string-%d", num);
+		ins.objectid = num;
+		if (i % 10000 == 0)
+			fprintf(stderr, "insert %d:%d\n", num, i);
+		ret = insert_item(root, &ins, buf, strlen(buf));
+		if (!ret)
+			tree_size++;
+		free(buf);
+	}
+	write_ctree_super(root, &super);
+	close_ctree(root);
+	root = open_ctree("dbfile", &super);
+	srand(128);
+	printf("starting search2\n");
+	for (i = 0; i < run_size; i++) {
+		num = next_key(i, max_key);
+		ins.objectid = num;
+		init_path(&path);
+		if (i % 10000 == 0)
+			fprintf(stderr, "search %d:%d\n", num, i);
+		ret = search_slot(root, &ins, &path, 0);
+		if (ret) {
+			print_tree(root, root->node);
+			printf("unable to find %d\n", num);
+			exit(1);
+		}
+		release_path(root, &path);
+	}
+	printf("starting big long delete run\n");
+	while(root->node && root->node->node.header.nritems > 0) {
+		struct leaf *leaf;
+		int slot;
+		ins.objectid = (u64)-1;
+		init_path(&path);
+		ret = search_slot(root, &ins, &path, -1);
+		if (ret == 0)
+			BUG();
+
+		leaf = &path.nodes[0]->leaf;
+		slot = path.slots[0];
+		if (slot != leaf->header.nritems)
+			BUG();
+		while(path.slots[0] > 0) {
+			path.slots[0] -= 1;
+			slot = path.slots[0];
+			leaf = &path.nodes[0]->leaf;
+
+			memcpy(&last, &leaf->items[slot].key, sizeof(last));
+			if (tree_size % 10000 == 0)
+				printf("big del %d:%d\n", tree_size, i);
+			ret = del_item(root, &path);
+			if (ret != 0) {
+				printf("del_item returned %d\n", ret);
+				BUG();
+			}
+			tree_size--;
+		}
+		release_path(root, &path);
+	}
+	printf("tree size is now %d\n", tree_size);
+	printf("map tree\n");
+	print_tree(root->extent_root, root->extent_root->node);
+	write_ctree_super(root, &super);
+	close_ctree(root);
+	return 0;
+}
diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c
index cebaf64..bbd554e 100644
--- a/fs/btrfs/random-test.c
+++ b/fs/btrfs/random-test.c
@@ -142,8 +142,98 @@
 	return -1;
 }
 
+static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix,
+		      int nr)
+{
+	struct ctree_path path;
+	struct key key;
+	unsigned long found = 0;
+	int ret;
+	int slot;
+	int *ptr;
+	int count = 0;
+
+	key.offset = 0;
+	key.flags = 0;
+	key.objectid = (unsigned long)-1;
+	while(nr-- >= 0) {
+		init_path(&path);
+		ret = search_slot(root, &key, &path, -1);
+		if (ret < 0) {
+			release_path(root, &path);
+			return ret;
+		}
+		if (ret != 0) {
+			if (path.slots[0] == 0) {
+				release_path(root, &path);
+				break;
+			}
+			path.slots[0] -= 1;
+		}
+		slot = path.slots[0];
+		found = path.nodes[0]->leaf.items[slot].key.objectid;
+		ret = del_item(root, &path);
+		count++;
+		if (ret) {
+			fprintf(stderr,
+				"failed to remove %lu from tree\n",
+				found);
+			return -1;
+		}
+		release_path(root, &path);
+		ptr = radix_tree_delete(radix, found);
+		if (!ptr)
+			goto error;
+		if (!keep_running)
+			break;
+	}
+	return 0;
+error:
+	fprintf(stderr, "failed to delete from the radix %lu\n", found);
+	return -1;
+}
+
+static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix,
+		     int count)
+{
+	int i;
+	int err;
+	int ret = 0;
+	for (i = 0; i < count; i++) {
+		ret = ins_one(root, radix);
+		if (ret) {
+			printf("fill failed\n");
+			err = ret;
+			goto out;
+		}
+		if (!keep_running)
+			break;
+	}
+out:
+	return ret;
+}
+
+static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix)
+{
+	int ret;
+	int nr = rand() % 20000;
+	static int run_nr = 0;
+
+	/* do the bulk op much less frequently */
+	if (run_nr++ % 100)
+		return 0;
+	ret = empty_tree(root, radix, nr);
+	if (ret)
+		return ret;
+	ret = fill_tree(root, radix, nr);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+
 int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) =
-{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent };
+{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op };
 
 static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix)
 {
@@ -192,7 +282,6 @@
 	}
 	return 0;
 }
-
 void sigstopper(int ignored)
 {
 	keep_running = 0;
@@ -241,22 +330,12 @@
 			print_usage();
 		}
 	}
-	for (i = 0; i < init_fill_count; i++) {
-		ret = ins_one(root, &radix);
-		if (ret) {
-			printf("initial fill failed\n");
-			err = ret;
-			goto out;
-		}
-		if (i % 10000 == 0) {
-			printf("initial fill %d level %d count %d\n", i,
-				node_level(root->node->node.header.flags),
-				root->node->node.header.nritems);
-		}
-		if (keep_running == 0) {
-			err = 0;
-			goto out;
-		}
+	printf("initial fill\n");
+	ret = fill_tree(root, &radix, init_fill_count);
+	printf("starting run\n");
+	if (ret) {
+		err = ret;
+		goto out;
 	}
 	if (initial_only == 1) {
 		goto out;
@@ -287,6 +366,8 @@
 				err = ret;
 				goto out;
 			}
+			if (ops[op] == bulk_op)
+				break;
 			if (keep_running == 0) {
 				err = 0;
 				goto out;