Btrfs: Implement raid0 when multiple devices are present

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 263f01c..d8fce32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/bio.h>
 #include <linux/buffer_head.h>
+#include <asm/div64.h>
 #include "ctree.h"
 #include "extent_map.h"
 #include "disk-io.h"
@@ -25,10 +26,24 @@
 #include "print-tree.h"
 #include "volumes.h"
 
-struct map_lookup {
+struct stripe {
 	struct btrfs_device *dev;
 	u64 physical;
 };
+
+struct map_lookup {
+	u64 type;
+	int io_align;
+	int io_width;
+	int stripe_len;
+	int sector_size;
+	int num_stripes;
+	struct stripe stripes[];
+};
+
+#define map_lookup_size(n) (sizeof(struct map_lookup) + \
+			    (sizeof(struct stripe) * (n)))
+
 static DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
 
@@ -592,6 +607,7 @@
 		      u64 *num_bytes, u64 type)
 {
 	u64 dev_offset;
+	struct btrfs_fs_info *info = extent_root->fs_info;
 	struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
 	struct btrfs_stripe *stripes;
 	struct btrfs_device *device = NULL;
@@ -610,10 +626,18 @@
 	int looped = 0;
 	int ret;
 	int index;
+	int stripe_len = 64 * 1024;
 	struct btrfs_key key;
 
 	if (list_empty(dev_list))
 		return -ENOSPC;
+
+	if (type & BTRFS_BLOCK_GROUP_RAID0)
+		num_stripes = btrfs_super_num_devices(&info->super_copy);
+	if (type & BTRFS_BLOCK_GROUP_DATA)
+		stripe_len = 64 * 1024;
+	if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
+		stripe_len = 32 * 1024;
 again:
 	INIT_LIST_HEAD(&private_devs);
 	cur = dev_list->next;
@@ -650,9 +674,15 @@
 	if (!chunk)
 		return -ENOMEM;
 
+	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
+	if (!map) {
+		kfree(chunk);
+		return -ENOMEM;
+	}
+
 	stripes = &chunk->stripe;
 
-	*num_bytes = calc_size;
+	*num_bytes = calc_size * num_stripes;
 	index = 0;
 	while(index < num_stripes) {
 		BUG_ON(list_empty(&private_devs));
@@ -669,6 +699,8 @@
 		ret = btrfs_update_device(trans, device);
 		BUG_ON(ret);
 
+		map->stripes[index].dev = device;
+		map->stripes[index].physical = dev_offset;
 		btrfs_set_stack_stripe_devid(stripes + index, device->devid);
 		btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
 		physical = dev_offset;
@@ -680,12 +712,18 @@
 	key.offset = *num_bytes;
 	key.type = BTRFS_CHUNK_ITEM_KEY;
 	btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
-	btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
+	btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
 	btrfs_set_stack_chunk_type(chunk, type);
 	btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
-	btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
-	btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
+	btrfs_set_stack_chunk_io_align(chunk, stripe_len);
+	btrfs_set_stack_chunk_io_width(chunk, stripe_len);
 	btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
+	map->sector_size = extent_root->sectorsize;
+	map->stripe_len = stripe_len;
+	map->io_align = stripe_len;
+	map->io_width = stripe_len;
+	map->type = type;
+	map->num_stripes = num_stripes;
 
 	ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
 				btrfs_chunk_item_size(num_stripes));
@@ -695,25 +733,11 @@
 	em = alloc_extent_map(GFP_NOFS);
 	if (!em)
 		return -ENOMEM;
-	map = kmalloc(sizeof(*map), GFP_NOFS);
-	if (!map) {
-		free_extent_map(em);
-		return -ENOMEM;
-	}
-
 	em->bdev = (struct block_device *)map;
 	em->start = key.objectid;
 	em->len = key.offset;
 	em->block_start = 0;
 
-	map->physical = physical;
-	map->dev = device;
-
-	if (!map->dev) {
-		kfree(map);
-		free_extent_map(em);
-		return -EIO;
-	}
 	kfree(chunk);
 
 	em_tree = &extent_root->fs_info->mapping_tree.map_tree;
@@ -758,6 +782,9 @@
 	struct map_lookup *map;
 	struct extent_map_tree *em_tree = &map_tree->map_tree;
 	u64 offset;
+	u64 stripe_offset;
+	u64 stripe_nr;
+	int stripe_index;
 
 
 	spin_lock(&em_tree->lock);
@@ -767,9 +794,40 @@
 	BUG_ON(em->start > logical || em->start + em->len < logical);
 	map = (struct map_lookup *)em->bdev;
 	offset = logical - em->start;
-	*phys = map->physical + offset;
-	*length = em->len - offset;
-	*dev = map->dev;
+
+	stripe_nr = offset;
+	/*
+	 * stripe_nr counts the total number of stripes we have to stride
+	 * to get to this block
+	 */
+	do_div(stripe_nr, map->stripe_len);
+
+	stripe_offset = stripe_nr * map->stripe_len;
+	BUG_ON(offset < stripe_offset);
+
+	/* stripe_offset is the offset of this block in its stripe*/
+	stripe_offset = offset - stripe_offset;
+
+	/*
+	 * after this do_div call, stripe_nr is the number of stripes
+	 * on this device we have to walk to find the data, and
+	 * stripe_index is the number of our device in the stripe array
+	 */
+	stripe_index = do_div(stripe_nr, map->num_stripes);
+
+	BUG_ON(stripe_index >= map->num_stripes);
+
+	*phys = map->stripes[stripe_index].physical + stripe_offset +
+		stripe_nr * map->stripe_len;
+
+	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+		/* we limit the length of each bio to what fits in a stripe */
+		*length = min_t(u64, em->len - offset,
+			      map->stripe_len - stripe_offset);
+	} else {
+		*length = em->len - offset;
+	}
+	*dev = map->stripes[stripe_index].dev;
 	free_extent_map(em);
 	spin_unlock(&em_tree->lock);
 	return 0;
@@ -822,7 +880,9 @@
 	u64 logical;
 	u64 length;
 	u64 devid;
+	int num_stripes;
 	int ret;
+	int i;
 
 	logical = key->objectid;
 	length = key->offset;
@@ -846,7 +906,8 @@
 	em = alloc_extent_map(GFP_NOFS);
 	if (!em)
 		return -ENOMEM;
-	map = kmalloc(sizeof(*map), GFP_NOFS);
+	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
 	if (!map) {
 		free_extent_map(em);
 		return -ENOMEM;
@@ -857,13 +918,22 @@
 	em->len = length;
 	em->block_start = 0;
 
-	map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
-	devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
-	map->dev = btrfs_find_device(root, devid);
-	if (!map->dev) {
-		kfree(map);
-		free_extent_map(em);
-		return -EIO;
+	map->num_stripes = num_stripes;
+	map->io_width = btrfs_chunk_io_width(leaf, chunk);
+	map->io_align = btrfs_chunk_io_align(leaf, chunk);
+	map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
+	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+	map->type = btrfs_chunk_type(leaf, chunk);
+	for (i = 0; i < num_stripes; i++) {
+		map->stripes[i].physical =
+			btrfs_stripe_offset_nr(leaf, chunk, i);
+		devid = btrfs_stripe_devid_nr(leaf, chunk, i);
+		map->stripes[i].dev = btrfs_find_device(root, devid);
+		if (!map->stripes[i].dev) {
+			kfree(map);
+			free_extent_map(em);
+			return -EIO;
+		}
 	}
 
 	spin_lock(&map_tree->map_tree.lock);