blob: 722eb455015404eaccbc5ae3c40ae2fad7a5dd45 [file] [log] [blame]
Chris Mason0b86a832008-03-24 15:01:56 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
Chris Mason8a4b83c2008-03-24 15:02:07 -040020#include <linux/buffer_head.h>
Chris Masonf2d8d742008-04-21 10:03:05 -040021#include <linux/blkdev.h>
Chris Mason788f20e2008-04-28 15:29:42 -040022#include <linux/random.h>
Chris Mason593060d2008-03-25 16:50:33 -040023#include <asm/div64.h>
Chris Mason0b86a832008-03-24 15:01:56 -040024#include "ctree.h"
25#include "extent_map.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "print-tree.h"
29#include "volumes.h"
30
Chris Mason593060d2008-03-25 16:50:33 -040031struct map_lookup {
32 u64 type;
33 int io_align;
34 int io_width;
35 int stripe_len;
36 int sector_size;
37 int num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -040038 int sub_stripes;
Chris Masoncea9e442008-04-09 16:28:12 -040039 struct btrfs_bio_stripe stripes[];
Chris Mason593060d2008-03-25 16:50:33 -040040};
41
42#define map_lookup_size(n) (sizeof(struct map_lookup) + \
Chris Masoncea9e442008-04-09 16:28:12 -040043 (sizeof(struct btrfs_bio_stripe) * (n)))
Chris Mason593060d2008-03-25 16:50:33 -040044
Chris Mason8a4b83c2008-03-24 15:02:07 -040045static DEFINE_MUTEX(uuid_mutex);
46static LIST_HEAD(fs_uuids);
47
Chris Masona061fc82008-05-07 11:43:44 -040048void btrfs_lock_volumes(void)
49{
50 mutex_lock(&uuid_mutex);
51}
52
53void btrfs_unlock_volumes(void)
54{
55 mutex_unlock(&uuid_mutex);
56}
57
Chris Mason8a4b83c2008-03-24 15:02:07 -040058int btrfs_cleanup_fs_uuids(void)
59{
60 struct btrfs_fs_devices *fs_devices;
61 struct list_head *uuid_cur;
62 struct list_head *devices_cur;
63 struct btrfs_device *dev;
64
65 list_for_each(uuid_cur, &fs_uuids) {
66 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
67 list);
68 while(!list_empty(&fs_devices->devices)) {
69 devices_cur = fs_devices->devices.next;
70 dev = list_entry(devices_cur, struct btrfs_device,
71 dev_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -040072 if (dev->bdev) {
Chris Mason8a4b83c2008-03-24 15:02:07 -040073 close_bdev_excl(dev->bdev);
Chris Masona0af4692008-05-13 16:03:06 -040074 fs_devices->open_devices--;
Chris Mason8a4b83c2008-03-24 15:02:07 -040075 }
76 list_del(&dev->dev_list);
Chris Masondfe25022008-05-13 13:46:40 -040077 kfree(dev->name);
Chris Mason8a4b83c2008-03-24 15:02:07 -040078 kfree(dev);
79 }
80 }
81 return 0;
82}
83
Chris Masona4437552008-04-18 10:29:38 -040084static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
85 u8 *uuid)
Chris Mason8a4b83c2008-03-24 15:02:07 -040086{
87 struct btrfs_device *dev;
88 struct list_head *cur;
89
90 list_for_each(cur, head) {
91 dev = list_entry(cur, struct btrfs_device, dev_list);
Chris Masona4437552008-04-18 10:29:38 -040092 if (dev->devid == devid &&
Chris Mason8f18cf12008-04-25 16:53:30 -040093 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
Chris Mason8a4b83c2008-03-24 15:02:07 -040094 return dev;
Chris Masona4437552008-04-18 10:29:38 -040095 }
Chris Mason8a4b83c2008-03-24 15:02:07 -040096 }
97 return NULL;
98}
99
100static struct btrfs_fs_devices *find_fsid(u8 *fsid)
101{
102 struct list_head *cur;
103 struct btrfs_fs_devices *fs_devices;
104
105 list_for_each(cur, &fs_uuids) {
106 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
107 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
108 return fs_devices;
109 }
110 return NULL;
111}
112
113static int device_list_add(const char *path,
114 struct btrfs_super_block *disk_super,
115 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
116{
117 struct btrfs_device *device;
118 struct btrfs_fs_devices *fs_devices;
119 u64 found_transid = btrfs_super_generation(disk_super);
120
121 fs_devices = find_fsid(disk_super->fsid);
122 if (!fs_devices) {
Chris Mason515dc322008-05-16 13:30:15 -0400123 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400124 if (!fs_devices)
125 return -ENOMEM;
126 INIT_LIST_HEAD(&fs_devices->devices);
Chris Masonb3075712008-04-22 09:22:07 -0400127 INIT_LIST_HEAD(&fs_devices->alloc_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400128 list_add(&fs_devices->list, &fs_uuids);
129 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
130 fs_devices->latest_devid = devid;
131 fs_devices->latest_trans = found_transid;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400132 device = NULL;
133 } else {
Chris Masona4437552008-04-18 10:29:38 -0400134 device = __find_device(&fs_devices->devices, devid,
135 disk_super->dev_item.uuid);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400136 }
137 if (!device) {
138 device = kzalloc(sizeof(*device), GFP_NOFS);
139 if (!device) {
140 /* we can safely leave the fs_devices entry around */
141 return -ENOMEM;
142 }
143 device->devid = devid;
Chris Masona4437552008-04-18 10:29:38 -0400144 memcpy(device->uuid, disk_super->dev_item.uuid,
145 BTRFS_UUID_SIZE);
Chris Masonf2984462008-04-10 16:19:33 -0400146 device->barriers = 1;
Chris Masonb248a412008-04-14 09:48:18 -0400147 spin_lock_init(&device->io_lock);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400148 device->name = kstrdup(path, GFP_NOFS);
149 if (!device->name) {
150 kfree(device);
151 return -ENOMEM;
152 }
153 list_add(&device->dev_list, &fs_devices->devices);
Chris Masonb3075712008-04-22 09:22:07 -0400154 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400155 fs_devices->num_devices++;
156 }
157
158 if (found_transid > fs_devices->latest_trans) {
159 fs_devices->latest_devid = devid;
160 fs_devices->latest_trans = found_transid;
161 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400162 *fs_devices_ret = fs_devices;
163 return 0;
164}
165
Chris Masondfe25022008-05-13 13:46:40 -0400166int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
167{
168 struct list_head *head = &fs_devices->devices;
169 struct list_head *cur;
170 struct btrfs_device *device;
171
172 mutex_lock(&uuid_mutex);
173again:
174 list_for_each(cur, head) {
175 device = list_entry(cur, struct btrfs_device, dev_list);
176 if (!device->in_fs_metadata) {
Chris Masona0af4692008-05-13 16:03:06 -0400177 if (device->bdev) {
Chris Masondfe25022008-05-13 13:46:40 -0400178 close_bdev_excl(device->bdev);
Chris Masona0af4692008-05-13 16:03:06 -0400179 fs_devices->open_devices--;
180 }
Chris Masondfe25022008-05-13 13:46:40 -0400181 list_del(&device->dev_list);
182 list_del(&device->dev_alloc_list);
183 fs_devices->num_devices--;
184 kfree(device->name);
185 kfree(device);
186 goto again;
187 }
188 }
189 mutex_unlock(&uuid_mutex);
190 return 0;
191}
Chris Masona0af4692008-05-13 16:03:06 -0400192
Chris Mason8a4b83c2008-03-24 15:02:07 -0400193int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
194{
195 struct list_head *head = &fs_devices->devices;
196 struct list_head *cur;
197 struct btrfs_device *device;
198
199 mutex_lock(&uuid_mutex);
200 list_for_each(cur, head) {
201 device = list_entry(cur, struct btrfs_device, dev_list);
202 if (device->bdev) {
203 close_bdev_excl(device->bdev);
Chris Masona0af4692008-05-13 16:03:06 -0400204 fs_devices->open_devices--;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400205 }
206 device->bdev = NULL;
Chris Masondfe25022008-05-13 13:46:40 -0400207 device->in_fs_metadata = 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400208 }
Chris Masona0af4692008-05-13 16:03:06 -0400209 fs_devices->mounted = 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400210 mutex_unlock(&uuid_mutex);
211 return 0;
212}
213
214int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
215 int flags, void *holder)
216{
217 struct block_device *bdev;
218 struct list_head *head = &fs_devices->devices;
219 struct list_head *cur;
220 struct btrfs_device *device;
Chris Masona0af4692008-05-13 16:03:06 -0400221 struct block_device *latest_bdev = NULL;
222 struct buffer_head *bh;
223 struct btrfs_super_block *disk_super;
224 u64 latest_devid = 0;
225 u64 latest_transid = 0;
226 u64 transid;
227 u64 devid;
228 int ret = 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400229
230 mutex_lock(&uuid_mutex);
Chris Masona0af4692008-05-13 16:03:06 -0400231 if (fs_devices->mounted)
232 goto out;
233
Chris Mason8a4b83c2008-03-24 15:02:07 -0400234 list_for_each(cur, head) {
235 device = list_entry(cur, struct btrfs_device, dev_list);
Chris Masonc1c4d912008-05-08 15:05:58 -0400236 if (device->bdev)
237 continue;
238
Chris Masondfe25022008-05-13 13:46:40 -0400239 if (!device->name)
240 continue;
241
Chris Mason8a4b83c2008-03-24 15:02:07 -0400242 bdev = open_bdev_excl(device->name, flags, holder);
Chris Masone17cade2008-04-15 15:41:47 -0400243
Chris Mason8a4b83c2008-03-24 15:02:07 -0400244 if (IS_ERR(bdev)) {
245 printk("open %s failed\n", device->name);
Chris Masona0af4692008-05-13 16:03:06 -0400246 goto error;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400247 }
Chris Masona061fc82008-05-07 11:43:44 -0400248 set_blocksize(bdev, 4096);
Chris Masona0af4692008-05-13 16:03:06 -0400249
250 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
251 if (!bh)
252 goto error_close;
253
254 disk_super = (struct btrfs_super_block *)bh->b_data;
255 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
256 sizeof(disk_super->magic)))
257 goto error_brelse;
258
259 devid = le64_to_cpu(disk_super->dev_item.devid);
260 if (devid != device->devid)
261 goto error_brelse;
262
263 transid = btrfs_super_generation(disk_super);
Chris Mason6af5ac32008-05-16 13:14:57 -0400264 if (!latest_transid || transid > latest_transid) {
Chris Masona0af4692008-05-13 16:03:06 -0400265 latest_devid = devid;
266 latest_transid = transid;
267 latest_bdev = bdev;
268 }
269
Chris Mason8a4b83c2008-03-24 15:02:07 -0400270 device->bdev = bdev;
Chris Masondfe25022008-05-13 13:46:40 -0400271 device->in_fs_metadata = 0;
Chris Masona0af4692008-05-13 16:03:06 -0400272 fs_devices->open_devices++;
273 continue;
Chris Masona061fc82008-05-07 11:43:44 -0400274
Chris Masona0af4692008-05-13 16:03:06 -0400275error_brelse:
276 brelse(bh);
277error_close:
278 close_bdev_excl(bdev);
279error:
280 continue;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400281 }
Chris Masona0af4692008-05-13 16:03:06 -0400282 if (fs_devices->open_devices == 0) {
283 ret = -EIO;
284 goto out;
285 }
286 fs_devices->mounted = 1;
287 fs_devices->latest_bdev = latest_bdev;
288 fs_devices->latest_devid = latest_devid;
289 fs_devices->latest_trans = latest_transid;
290out:
Chris Mason8a4b83c2008-03-24 15:02:07 -0400291 mutex_unlock(&uuid_mutex);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400292 return ret;
293}
294
295int btrfs_scan_one_device(const char *path, int flags, void *holder,
296 struct btrfs_fs_devices **fs_devices_ret)
297{
298 struct btrfs_super_block *disk_super;
299 struct block_device *bdev;
300 struct buffer_head *bh;
301 int ret;
302 u64 devid;
Chris Masonf2984462008-04-10 16:19:33 -0400303 u64 transid;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400304
305 mutex_lock(&uuid_mutex);
306
Chris Mason8a4b83c2008-03-24 15:02:07 -0400307 bdev = open_bdev_excl(path, flags, holder);
308
309 if (IS_ERR(bdev)) {
Chris Mason8a4b83c2008-03-24 15:02:07 -0400310 ret = PTR_ERR(bdev);
311 goto error;
312 }
313
314 ret = set_blocksize(bdev, 4096);
315 if (ret)
316 goto error_close;
317 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
318 if (!bh) {
319 ret = -EIO;
320 goto error_close;
321 }
322 disk_super = (struct btrfs_super_block *)bh->b_data;
323 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
324 sizeof(disk_super->magic))) {
Yane58ca022008-04-01 11:21:34 -0400325 ret = -EINVAL;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400326 goto error_brelse;
327 }
328 devid = le64_to_cpu(disk_super->dev_item.devid);
Chris Masonf2984462008-04-10 16:19:33 -0400329 transid = btrfs_super_generation(disk_super);
Chris Mason7ae9c092008-04-18 10:29:49 -0400330 if (disk_super->label[0])
331 printk("device label %s ", disk_super->label);
332 else {
333 /* FIXME, make a readl uuid parser */
334 printk("device fsid %llx-%llx ",
335 *(unsigned long long *)disk_super->fsid,
336 *(unsigned long long *)(disk_super->fsid + 8));
337 }
338 printk("devid %Lu transid %Lu %s\n", devid, transid, path);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400339 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
340
341error_brelse:
342 brelse(bh);
343error_close:
344 close_bdev_excl(bdev);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400345error:
346 mutex_unlock(&uuid_mutex);
347 return ret;
348}
Chris Mason0b86a832008-03-24 15:01:56 -0400349
350/*
351 * this uses a pretty simple search, the expectation is that it is
352 * called very infrequently and that a given device has a small number
353 * of extents
354 */
355static int find_free_dev_extent(struct btrfs_trans_handle *trans,
356 struct btrfs_device *device,
357 struct btrfs_path *path,
358 u64 num_bytes, u64 *start)
359{
360 struct btrfs_key key;
361 struct btrfs_root *root = device->dev_root;
362 struct btrfs_dev_extent *dev_extent = NULL;
363 u64 hole_size = 0;
364 u64 last_byte = 0;
365 u64 search_start = 0;
366 u64 search_end = device->total_bytes;
367 int ret;
368 int slot = 0;
369 int start_found;
370 struct extent_buffer *l;
371
372 start_found = 0;
373 path->reada = 2;
374
375 /* FIXME use last free of some kind */
376
Chris Mason8a4b83c2008-03-24 15:02:07 -0400377 /* we don't want to overwrite the superblock on the drive,
378 * so we make sure to start at an offset of at least 1MB
379 */
380 search_start = max((u64)1024 * 1024, search_start);
Chris Mason8f18cf12008-04-25 16:53:30 -0400381
382 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
383 search_start = max(root->fs_info->alloc_start, search_start);
384
Chris Mason0b86a832008-03-24 15:01:56 -0400385 key.objectid = device->devid;
386 key.offset = search_start;
387 key.type = BTRFS_DEV_EXTENT_KEY;
388 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
389 if (ret < 0)
390 goto error;
391 ret = btrfs_previous_item(root, path, 0, key.type);
392 if (ret < 0)
393 goto error;
394 l = path->nodes[0];
395 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
396 while (1) {
397 l = path->nodes[0];
398 slot = path->slots[0];
399 if (slot >= btrfs_header_nritems(l)) {
400 ret = btrfs_next_leaf(root, path);
401 if (ret == 0)
402 continue;
403 if (ret < 0)
404 goto error;
405no_more_items:
406 if (!start_found) {
407 if (search_start >= search_end) {
408 ret = -ENOSPC;
409 goto error;
410 }
411 *start = search_start;
412 start_found = 1;
413 goto check_pending;
414 }
415 *start = last_byte > search_start ?
416 last_byte : search_start;
417 if (search_end <= *start) {
418 ret = -ENOSPC;
419 goto error;
420 }
421 goto check_pending;
422 }
423 btrfs_item_key_to_cpu(l, &key, slot);
424
425 if (key.objectid < device->devid)
426 goto next;
427
428 if (key.objectid > device->devid)
429 goto no_more_items;
430
431 if (key.offset >= search_start && key.offset > last_byte &&
432 start_found) {
433 if (last_byte < search_start)
434 last_byte = search_start;
435 hole_size = key.offset - last_byte;
436 if (key.offset > last_byte &&
437 hole_size >= num_bytes) {
438 *start = last_byte;
439 goto check_pending;
440 }
441 }
442 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
443 goto next;
444 }
445
446 start_found = 1;
447 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
448 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
449next:
450 path->slots[0]++;
451 cond_resched();
452 }
453check_pending:
454 /* we have to make sure we didn't find an extent that has already
455 * been allocated by the map tree or the original allocation
456 */
457 btrfs_release_path(root, path);
458 BUG_ON(*start < search_start);
459
Chris Mason6324fbf2008-03-24 15:01:59 -0400460 if (*start + num_bytes > search_end) {
Chris Mason0b86a832008-03-24 15:01:56 -0400461 ret = -ENOSPC;
462 goto error;
463 }
464 /* check for pending inserts here */
465 return 0;
466
467error:
468 btrfs_release_path(root, path);
469 return ret;
470}
471
Chris Mason8f18cf12008-04-25 16:53:30 -0400472int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
473 struct btrfs_device *device,
474 u64 start)
475{
476 int ret;
477 struct btrfs_path *path;
478 struct btrfs_root *root = device->dev_root;
479 struct btrfs_key key;
Chris Masona061fc82008-05-07 11:43:44 -0400480 struct btrfs_key found_key;
481 struct extent_buffer *leaf = NULL;
482 struct btrfs_dev_extent *extent = NULL;
Chris Mason8f18cf12008-04-25 16:53:30 -0400483
484 path = btrfs_alloc_path();
485 if (!path)
486 return -ENOMEM;
487
488 key.objectid = device->devid;
489 key.offset = start;
490 key.type = BTRFS_DEV_EXTENT_KEY;
491
492 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
Chris Masona061fc82008-05-07 11:43:44 -0400493 if (ret > 0) {
494 ret = btrfs_previous_item(root, path, key.objectid,
495 BTRFS_DEV_EXTENT_KEY);
496 BUG_ON(ret);
497 leaf = path->nodes[0];
498 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
499 extent = btrfs_item_ptr(leaf, path->slots[0],
500 struct btrfs_dev_extent);
501 BUG_ON(found_key.offset > start || found_key.offset +
502 btrfs_dev_extent_length(leaf, extent) < start);
503 ret = 0;
504 } else if (ret == 0) {
505 leaf = path->nodes[0];
506 extent = btrfs_item_ptr(leaf, path->slots[0],
507 struct btrfs_dev_extent);
508 }
Chris Mason8f18cf12008-04-25 16:53:30 -0400509 BUG_ON(ret);
510
Chris Masondfe25022008-05-13 13:46:40 -0400511 if (device->bytes_used > 0)
512 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
Chris Mason8f18cf12008-04-25 16:53:30 -0400513 ret = btrfs_del_item(trans, root, path);
514 BUG_ON(ret);
515
516 btrfs_free_path(path);
517 return ret;
518}
519
Chris Mason0b86a832008-03-24 15:01:56 -0400520int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
521 struct btrfs_device *device,
Chris Masone17cade2008-04-15 15:41:47 -0400522 u64 chunk_tree, u64 chunk_objectid,
523 u64 chunk_offset,
524 u64 num_bytes, u64 *start)
Chris Mason0b86a832008-03-24 15:01:56 -0400525{
526 int ret;
527 struct btrfs_path *path;
528 struct btrfs_root *root = device->dev_root;
529 struct btrfs_dev_extent *extent;
530 struct extent_buffer *leaf;
531 struct btrfs_key key;
532
Chris Masondfe25022008-05-13 13:46:40 -0400533 WARN_ON(!device->in_fs_metadata);
Chris Mason0b86a832008-03-24 15:01:56 -0400534 path = btrfs_alloc_path();
535 if (!path)
536 return -ENOMEM;
537
538 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
Chris Mason6324fbf2008-03-24 15:01:59 -0400539 if (ret) {
Chris Mason0b86a832008-03-24 15:01:56 -0400540 goto err;
Chris Mason6324fbf2008-03-24 15:01:59 -0400541 }
Chris Mason0b86a832008-03-24 15:01:56 -0400542
543 key.objectid = device->devid;
544 key.offset = *start;
545 key.type = BTRFS_DEV_EXTENT_KEY;
546 ret = btrfs_insert_empty_item(trans, root, path, &key,
547 sizeof(*extent));
548 BUG_ON(ret);
549
550 leaf = path->nodes[0];
551 extent = btrfs_item_ptr(leaf, path->slots[0],
552 struct btrfs_dev_extent);
Chris Masone17cade2008-04-15 15:41:47 -0400553 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
554 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
555 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
556
557 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
558 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
559 BTRFS_UUID_SIZE);
560
Chris Mason0b86a832008-03-24 15:01:56 -0400561 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
562 btrfs_mark_buffer_dirty(leaf);
563err:
564 btrfs_free_path(path);
565 return ret;
566}
567
Chris Masone17cade2008-04-15 15:41:47 -0400568static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
Chris Mason0b86a832008-03-24 15:01:56 -0400569{
570 struct btrfs_path *path;
571 int ret;
572 struct btrfs_key key;
Chris Masone17cade2008-04-15 15:41:47 -0400573 struct btrfs_chunk *chunk;
Chris Mason0b86a832008-03-24 15:01:56 -0400574 struct btrfs_key found_key;
575
576 path = btrfs_alloc_path();
577 BUG_ON(!path);
578
Chris Masone17cade2008-04-15 15:41:47 -0400579 key.objectid = objectid;
Chris Mason0b86a832008-03-24 15:01:56 -0400580 key.offset = (u64)-1;
581 key.type = BTRFS_CHUNK_ITEM_KEY;
582
583 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
584 if (ret < 0)
585 goto error;
586
587 BUG_ON(ret == 0);
588
589 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
590 if (ret) {
Chris Masone17cade2008-04-15 15:41:47 -0400591 *offset = 0;
Chris Mason0b86a832008-03-24 15:01:56 -0400592 } else {
593 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
594 path->slots[0]);
Chris Masone17cade2008-04-15 15:41:47 -0400595 if (found_key.objectid != objectid)
596 *offset = 0;
597 else {
598 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
599 struct btrfs_chunk);
600 *offset = found_key.offset +
601 btrfs_chunk_length(path->nodes[0], chunk);
602 }
Chris Mason0b86a832008-03-24 15:01:56 -0400603 }
604 ret = 0;
605error:
606 btrfs_free_path(path);
607 return ret;
608}
609
Chris Mason0b86a832008-03-24 15:01:56 -0400610static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
611 u64 *objectid)
612{
613 int ret;
614 struct btrfs_key key;
615 struct btrfs_key found_key;
616
617 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
618 key.type = BTRFS_DEV_ITEM_KEY;
619 key.offset = (u64)-1;
620
621 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
622 if (ret < 0)
623 goto error;
624
625 BUG_ON(ret == 0);
626
627 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
628 BTRFS_DEV_ITEM_KEY);
629 if (ret) {
630 *objectid = 1;
631 } else {
632 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
633 path->slots[0]);
634 *objectid = found_key.offset + 1;
635 }
636 ret = 0;
637error:
638 btrfs_release_path(root, path);
639 return ret;
640}
641
642/*
643 * the device information is stored in the chunk root
644 * the btrfs_device struct should be fully filled in
645 */
646int btrfs_add_device(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root,
648 struct btrfs_device *device)
649{
650 int ret;
651 struct btrfs_path *path;
652 struct btrfs_dev_item *dev_item;
653 struct extent_buffer *leaf;
654 struct btrfs_key key;
655 unsigned long ptr;
Chris Mason006a58a2008-05-02 14:43:15 -0400656 u64 free_devid = 0;
Chris Mason0b86a832008-03-24 15:01:56 -0400657
658 root = root->fs_info->chunk_root;
659
660 path = btrfs_alloc_path();
661 if (!path)
662 return -ENOMEM;
663
664 ret = find_next_devid(root, path, &free_devid);
665 if (ret)
666 goto out;
667
668 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
669 key.type = BTRFS_DEV_ITEM_KEY;
670 key.offset = free_devid;
671
672 ret = btrfs_insert_empty_item(trans, root, path, &key,
Chris Mason0d81ba52008-03-24 15:02:07 -0400673 sizeof(*dev_item));
Chris Mason0b86a832008-03-24 15:01:56 -0400674 if (ret)
675 goto out;
676
677 leaf = path->nodes[0];
678 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
679
Chris Mason8a4b83c2008-03-24 15:02:07 -0400680 device->devid = free_devid;
Chris Mason0b86a832008-03-24 15:01:56 -0400681 btrfs_set_device_id(leaf, dev_item, device->devid);
682 btrfs_set_device_type(leaf, dev_item, device->type);
683 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
684 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
685 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
Chris Mason0b86a832008-03-24 15:01:56 -0400686 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
687 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
Chris Masone17cade2008-04-15 15:41:47 -0400688 btrfs_set_device_group(leaf, dev_item, 0);
689 btrfs_set_device_seek_speed(leaf, dev_item, 0);
690 btrfs_set_device_bandwidth(leaf, dev_item, 0);
Chris Mason0b86a832008-03-24 15:01:56 -0400691
Chris Mason0b86a832008-03-24 15:01:56 -0400692 ptr = (unsigned long)btrfs_device_uuid(dev_item);
Chris Masone17cade2008-04-15 15:41:47 -0400693 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -0400694 btrfs_mark_buffer_dirty(leaf);
695 ret = 0;
696
697out:
698 btrfs_free_path(path);
699 return ret;
700}
Chris Mason8f18cf12008-04-25 16:53:30 -0400701
Chris Masona061fc82008-05-07 11:43:44 -0400702static int btrfs_rm_dev_item(struct btrfs_root *root,
703 struct btrfs_device *device)
704{
705 int ret;
706 struct btrfs_path *path;
707 struct block_device *bdev = device->bdev;
708 struct btrfs_device *next_dev;
709 struct btrfs_key key;
710 u64 total_bytes;
711 struct btrfs_fs_devices *fs_devices;
712 struct btrfs_trans_handle *trans;
713
714 root = root->fs_info->chunk_root;
715
716 path = btrfs_alloc_path();
717 if (!path)
718 return -ENOMEM;
719
720 trans = btrfs_start_transaction(root, 1);
721 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
722 key.type = BTRFS_DEV_ITEM_KEY;
723 key.offset = device->devid;
724
725 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
726 if (ret < 0)
727 goto out;
728
729 if (ret > 0) {
730 ret = -ENOENT;
731 goto out;
732 }
733
734 ret = btrfs_del_item(trans, root, path);
735 if (ret)
736 goto out;
737
738 /*
739 * at this point, the device is zero sized. We want to
740 * remove it from the devices list and zero out the old super
741 */
742 list_del_init(&device->dev_list);
743 list_del_init(&device->dev_alloc_list);
744 fs_devices = root->fs_info->fs_devices;
745
746 next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
747 dev_list);
Chris Masona061fc82008-05-07 11:43:44 -0400748 if (bdev == root->fs_info->sb->s_bdev)
749 root->fs_info->sb->s_bdev = next_dev->bdev;
750 if (bdev == fs_devices->latest_bdev)
751 fs_devices->latest_bdev = next_dev->bdev;
752
Chris Masona061fc82008-05-07 11:43:44 -0400753 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
754 btrfs_set_super_num_devices(&root->fs_info->super_copy,
755 total_bytes - 1);
756out:
757 btrfs_free_path(path);
758 btrfs_commit_transaction(trans, root);
759 return ret;
760}
761
762int btrfs_rm_device(struct btrfs_root *root, char *device_path)
763{
764 struct btrfs_device *device;
765 struct block_device *bdev;
Chris Masondfe25022008-05-13 13:46:40 -0400766 struct buffer_head *bh = NULL;
Chris Masona061fc82008-05-07 11:43:44 -0400767 struct btrfs_super_block *disk_super;
768 u64 all_avail;
769 u64 devid;
770 int ret = 0;
771
772 mutex_lock(&root->fs_info->fs_mutex);
773 mutex_lock(&uuid_mutex);
774
775 all_avail = root->fs_info->avail_data_alloc_bits |
776 root->fs_info->avail_system_alloc_bits |
777 root->fs_info->avail_metadata_alloc_bits;
778
779 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
Chris Masondfe25022008-05-13 13:46:40 -0400780 btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
Chris Masona061fc82008-05-07 11:43:44 -0400781 printk("btrfs: unable to go below four devices on raid10\n");
782 ret = -EINVAL;
783 goto out;
784 }
785
786 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
Chris Masondfe25022008-05-13 13:46:40 -0400787 btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
Chris Masona061fc82008-05-07 11:43:44 -0400788 printk("btrfs: unable to go below two devices on raid1\n");
789 ret = -EINVAL;
790 goto out;
791 }
792
Chris Masondfe25022008-05-13 13:46:40 -0400793 if (strcmp(device_path, "missing") == 0) {
794 struct list_head *cur;
795 struct list_head *devices;
796 struct btrfs_device *tmp;
Chris Masona061fc82008-05-07 11:43:44 -0400797
Chris Masondfe25022008-05-13 13:46:40 -0400798 device = NULL;
799 devices = &root->fs_info->fs_devices->devices;
800 list_for_each(cur, devices) {
801 tmp = list_entry(cur, struct btrfs_device, dev_list);
802 if (tmp->in_fs_metadata && !tmp->bdev) {
803 device = tmp;
804 break;
805 }
806 }
807 bdev = NULL;
808 bh = NULL;
809 disk_super = NULL;
810 if (!device) {
811 printk("btrfs: no missing devices found to remove\n");
812 goto out;
813 }
Chris Masona061fc82008-05-07 11:43:44 -0400814
Chris Masondfe25022008-05-13 13:46:40 -0400815 } else {
816 bdev = open_bdev_excl(device_path, 0,
817 root->fs_info->bdev_holder);
818 if (IS_ERR(bdev)) {
819 ret = PTR_ERR(bdev);
820 goto out;
821 }
822
823 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
824 if (!bh) {
825 ret = -EIO;
826 goto error_close;
827 }
828 disk_super = (struct btrfs_super_block *)bh->b_data;
829 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
830 sizeof(disk_super->magic))) {
831 ret = -ENOENT;
832 goto error_brelse;
833 }
834 if (memcmp(disk_super->fsid, root->fs_info->fsid,
835 BTRFS_FSID_SIZE)) {
836 ret = -ENOENT;
837 goto error_brelse;
838 }
839 devid = le64_to_cpu(disk_super->dev_item.devid);
840 device = btrfs_find_device(root, devid, NULL);
841 if (!device) {
842 ret = -ENOENT;
843 goto error_brelse;
844 }
845
846 }
Chris Masona061fc82008-05-07 11:43:44 -0400847 root->fs_info->fs_devices->num_devices--;
Chris Mason0ef3e662008-05-24 14:04:53 -0400848 root->fs_info->fs_devices->open_devices--;
Chris Masona061fc82008-05-07 11:43:44 -0400849
850 ret = btrfs_shrink_device(device, 0);
851 if (ret)
852 goto error_brelse;
853
854
855 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
856 if (ret)
857 goto error_brelse;
858
Chris Masondfe25022008-05-13 13:46:40 -0400859 if (bh) {
860 /* make sure this device isn't detected as part of
861 * the FS anymore
862 */
863 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
864 set_buffer_dirty(bh);
865 sync_dirty_buffer(bh);
Chris Masona061fc82008-05-07 11:43:44 -0400866
Chris Masondfe25022008-05-13 13:46:40 -0400867 brelse(bh);
868 }
Chris Masona061fc82008-05-07 11:43:44 -0400869
Chris Masondfe25022008-05-13 13:46:40 -0400870 if (device->bdev) {
871 /* one close for the device struct or super_block */
872 close_bdev_excl(device->bdev);
873 }
874 if (bdev) {
875 /* one close for us */
876 close_bdev_excl(bdev);
877 }
Chris Masona061fc82008-05-07 11:43:44 -0400878 kfree(device->name);
879 kfree(device);
880 ret = 0;
881 goto out;
882
883error_brelse:
884 brelse(bh);
885error_close:
Chris Masondfe25022008-05-13 13:46:40 -0400886 if (bdev)
887 close_bdev_excl(bdev);
Chris Masona061fc82008-05-07 11:43:44 -0400888out:
889 mutex_unlock(&uuid_mutex);
890 mutex_unlock(&root->fs_info->fs_mutex);
891 return ret;
892}
893
Chris Mason788f20e2008-04-28 15:29:42 -0400894int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
895{
896 struct btrfs_trans_handle *trans;
897 struct btrfs_device *device;
898 struct block_device *bdev;
899 struct list_head *cur;
900 struct list_head *devices;
901 u64 total_bytes;
902 int ret = 0;
903
904
905 bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
906 if (!bdev) {
907 return -EIO;
908 }
909 mutex_lock(&root->fs_info->fs_mutex);
910 trans = btrfs_start_transaction(root, 1);
911 devices = &root->fs_info->fs_devices->devices;
912 list_for_each(cur, devices) {
913 device = list_entry(cur, struct btrfs_device, dev_list);
914 if (device->bdev == bdev) {
915 ret = -EEXIST;
916 goto out;
917 }
918 }
919
920 device = kzalloc(sizeof(*device), GFP_NOFS);
921 if (!device) {
922 /* we can safely leave the fs_devices entry around */
923 ret = -ENOMEM;
924 goto out_close_bdev;
925 }
926
927 device->barriers = 1;
928 generate_random_uuid(device->uuid);
929 spin_lock_init(&device->io_lock);
930 device->name = kstrdup(device_path, GFP_NOFS);
931 if (!device->name) {
932 kfree(device);
933 goto out_close_bdev;
934 }
935 device->io_width = root->sectorsize;
936 device->io_align = root->sectorsize;
937 device->sector_size = root->sectorsize;
938 device->total_bytes = i_size_read(bdev->bd_inode);
939 device->dev_root = root->fs_info->dev_root;
940 device->bdev = bdev;
Chris Masondfe25022008-05-13 13:46:40 -0400941 device->in_fs_metadata = 1;
Chris Mason788f20e2008-04-28 15:29:42 -0400942
943 ret = btrfs_add_device(trans, root, device);
944 if (ret)
945 goto out_close_bdev;
946
947 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
948 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
949 total_bytes + device->total_bytes);
950
951 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
952 btrfs_set_super_num_devices(&root->fs_info->super_copy,
953 total_bytes + 1);
954
955 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
956 list_add(&device->dev_alloc_list,
957 &root->fs_info->fs_devices->alloc_list);
958 root->fs_info->fs_devices->num_devices++;
Chris Masona0af4692008-05-13 16:03:06 -0400959 root->fs_info->fs_devices->open_devices++;
Chris Mason788f20e2008-04-28 15:29:42 -0400960out:
961 btrfs_end_transaction(trans, root);
962 mutex_unlock(&root->fs_info->fs_mutex);
963 return ret;
964
965out_close_bdev:
966 close_bdev_excl(bdev);
967 goto out;
968}
969
Chris Mason0b86a832008-03-24 15:01:56 -0400970int btrfs_update_device(struct btrfs_trans_handle *trans,
971 struct btrfs_device *device)
972{
973 int ret;
974 struct btrfs_path *path;
975 struct btrfs_root *root;
976 struct btrfs_dev_item *dev_item;
977 struct extent_buffer *leaf;
978 struct btrfs_key key;
979
980 root = device->dev_root->fs_info->chunk_root;
981
982 path = btrfs_alloc_path();
983 if (!path)
984 return -ENOMEM;
985
986 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
987 key.type = BTRFS_DEV_ITEM_KEY;
988 key.offset = device->devid;
989
990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
991 if (ret < 0)
992 goto out;
993
994 if (ret > 0) {
995 ret = -ENOENT;
996 goto out;
997 }
998
999 leaf = path->nodes[0];
1000 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1001
1002 btrfs_set_device_id(leaf, dev_item, device->devid);
1003 btrfs_set_device_type(leaf, dev_item, device->type);
1004 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1005 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1006 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
Chris Mason0b86a832008-03-24 15:01:56 -04001007 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1008 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1009 btrfs_mark_buffer_dirty(leaf);
1010
1011out:
1012 btrfs_free_path(path);
1013 return ret;
1014}
1015
Chris Mason8f18cf12008-04-25 16:53:30 -04001016int btrfs_grow_device(struct btrfs_trans_handle *trans,
1017 struct btrfs_device *device, u64 new_size)
1018{
1019 struct btrfs_super_block *super_copy =
1020 &device->dev_root->fs_info->super_copy;
1021 u64 old_total = btrfs_super_total_bytes(super_copy);
1022 u64 diff = new_size - device->total_bytes;
1023
1024 btrfs_set_super_total_bytes(super_copy, old_total + diff);
1025 return btrfs_update_device(trans, device);
1026}
1027
1028static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1029 struct btrfs_root *root,
1030 u64 chunk_tree, u64 chunk_objectid,
1031 u64 chunk_offset)
1032{
1033 int ret;
1034 struct btrfs_path *path;
1035 struct btrfs_key key;
1036
1037 root = root->fs_info->chunk_root;
1038 path = btrfs_alloc_path();
1039 if (!path)
1040 return -ENOMEM;
1041
1042 key.objectid = chunk_objectid;
1043 key.offset = chunk_offset;
1044 key.type = BTRFS_CHUNK_ITEM_KEY;
1045
1046 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1047 BUG_ON(ret);
1048
1049 ret = btrfs_del_item(trans, root, path);
1050 BUG_ON(ret);
1051
1052 btrfs_free_path(path);
1053 return 0;
1054}
1055
1056int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1057 chunk_offset)
1058{
1059 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1060 struct btrfs_disk_key *disk_key;
1061 struct btrfs_chunk *chunk;
1062 u8 *ptr;
1063 int ret = 0;
1064 u32 num_stripes;
1065 u32 array_size;
1066 u32 len = 0;
1067 u32 cur;
1068 struct btrfs_key key;
1069
1070 array_size = btrfs_super_sys_array_size(super_copy);
1071
1072 ptr = super_copy->sys_chunk_array;
1073 cur = 0;
1074
1075 while (cur < array_size) {
1076 disk_key = (struct btrfs_disk_key *)ptr;
1077 btrfs_disk_key_to_cpu(&key, disk_key);
1078
1079 len = sizeof(*disk_key);
1080
1081 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1082 chunk = (struct btrfs_chunk *)(ptr + len);
1083 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1084 len += btrfs_chunk_item_size(num_stripes);
1085 } else {
1086 ret = -EIO;
1087 break;
1088 }
1089 if (key.objectid == chunk_objectid &&
1090 key.offset == chunk_offset) {
1091 memmove(ptr, ptr + len, array_size - (cur + len));
1092 array_size -= len;
1093 btrfs_set_super_sys_array_size(super_copy, array_size);
1094 } else {
1095 ptr += len;
1096 cur += len;
1097 }
1098 }
1099 return ret;
1100}
1101
1102
1103int btrfs_relocate_chunk(struct btrfs_root *root,
1104 u64 chunk_tree, u64 chunk_objectid,
1105 u64 chunk_offset)
1106{
1107 struct extent_map_tree *em_tree;
1108 struct btrfs_root *extent_root;
1109 struct btrfs_trans_handle *trans;
1110 struct extent_map *em;
1111 struct map_lookup *map;
1112 int ret;
1113 int i;
1114
Chris Mason323da792008-05-09 11:46:48 -04001115 printk("btrfs relocating chunk %llu\n",
1116 (unsigned long long)chunk_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001117 root = root->fs_info->chunk_root;
1118 extent_root = root->fs_info->extent_root;
1119 em_tree = &root->fs_info->mapping_tree.map_tree;
1120
1121 /* step one, relocate all the extents inside this chunk */
1122 ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
1123 BUG_ON(ret);
1124
1125 trans = btrfs_start_transaction(root, 1);
1126 BUG_ON(!trans);
1127
1128 /*
1129 * step two, delete the device extents and the
1130 * chunk tree entries
1131 */
1132 spin_lock(&em_tree->lock);
1133 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1134 spin_unlock(&em_tree->lock);
1135
Chris Masona061fc82008-05-07 11:43:44 -04001136 BUG_ON(em->start > chunk_offset ||
1137 em->start + em->len < chunk_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001138 map = (struct map_lookup *)em->bdev;
1139
1140 for (i = 0; i < map->num_stripes; i++) {
1141 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1142 map->stripes[i].physical);
1143 BUG_ON(ret);
Chris Masona061fc82008-05-07 11:43:44 -04001144
Chris Masondfe25022008-05-13 13:46:40 -04001145 if (map->stripes[i].dev) {
1146 ret = btrfs_update_device(trans, map->stripes[i].dev);
1147 BUG_ON(ret);
1148 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001149 }
1150 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1151 chunk_offset);
1152
1153 BUG_ON(ret);
1154
1155 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1156 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1157 BUG_ON(ret);
Chris Mason8f18cf12008-04-25 16:53:30 -04001158 }
1159
Chris Mason8f18cf12008-04-25 16:53:30 -04001160 spin_lock(&em_tree->lock);
1161 remove_extent_mapping(em_tree, em);
1162 kfree(map);
1163 em->bdev = NULL;
1164
1165 /* once for the tree */
1166 free_extent_map(em);
1167 spin_unlock(&em_tree->lock);
1168
Chris Mason8f18cf12008-04-25 16:53:30 -04001169 /* once for us */
1170 free_extent_map(em);
1171
1172 btrfs_end_transaction(trans, root);
1173 return 0;
1174}
1175
Chris Masonec44a352008-04-28 15:29:52 -04001176static u64 div_factor(u64 num, int factor)
1177{
1178 if (factor == 10)
1179 return num;
1180 num *= factor;
1181 do_div(num, 10);
1182 return num;
1183}
1184
1185
1186int btrfs_balance(struct btrfs_root *dev_root)
1187{
1188 int ret;
1189 struct list_head *cur;
1190 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1191 struct btrfs_device *device;
1192 u64 old_size;
1193 u64 size_to_free;
1194 struct btrfs_path *path;
1195 struct btrfs_key key;
1196 struct btrfs_chunk *chunk;
1197 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1198 struct btrfs_trans_handle *trans;
1199 struct btrfs_key found_key;
1200
1201
1202 dev_root = dev_root->fs_info->dev_root;
1203
1204 mutex_lock(&dev_root->fs_info->fs_mutex);
1205 /* step one make some room on all the devices */
1206 list_for_each(cur, devices) {
1207 device = list_entry(cur, struct btrfs_device, dev_list);
1208 old_size = device->total_bytes;
1209 size_to_free = div_factor(old_size, 1);
1210 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1211 if (device->total_bytes - device->bytes_used > size_to_free)
1212 continue;
1213
1214 ret = btrfs_shrink_device(device, old_size - size_to_free);
1215 BUG_ON(ret);
1216
1217 trans = btrfs_start_transaction(dev_root, 1);
1218 BUG_ON(!trans);
1219
1220 ret = btrfs_grow_device(trans, device, old_size);
1221 BUG_ON(ret);
1222
1223 btrfs_end_transaction(trans, dev_root);
1224 }
1225
1226 /* step two, relocate all the chunks */
1227 path = btrfs_alloc_path();
1228 BUG_ON(!path);
1229
1230 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1231 key.offset = (u64)-1;
1232 key.type = BTRFS_CHUNK_ITEM_KEY;
1233
1234 while(1) {
1235 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1236 if (ret < 0)
1237 goto error;
1238
1239 /*
1240 * this shouldn't happen, it means the last relocate
1241 * failed
1242 */
1243 if (ret == 0)
1244 break;
1245
1246 ret = btrfs_previous_item(chunk_root, path, 0,
1247 BTRFS_CHUNK_ITEM_KEY);
1248 if (ret) {
1249 break;
1250 }
1251 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1252 path->slots[0]);
1253 if (found_key.objectid != key.objectid)
1254 break;
1255 chunk = btrfs_item_ptr(path->nodes[0],
1256 path->slots[0],
1257 struct btrfs_chunk);
1258 key.offset = found_key.offset;
1259 /* chunk zero is special */
1260 if (key.offset == 0)
1261 break;
1262
1263 ret = btrfs_relocate_chunk(chunk_root,
1264 chunk_root->root_key.objectid,
1265 found_key.objectid,
1266 found_key.offset);
1267 BUG_ON(ret);
1268 btrfs_release_path(chunk_root, path);
1269 }
1270 ret = 0;
1271error:
1272 btrfs_free_path(path);
1273 mutex_unlock(&dev_root->fs_info->fs_mutex);
1274 return ret;
1275}
1276
Chris Mason8f18cf12008-04-25 16:53:30 -04001277/*
1278 * shrinking a device means finding all of the device extents past
1279 * the new size, and then following the back refs to the chunks.
1280 * The chunk relocation code actually frees the device extent
1281 */
1282int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1283{
1284 struct btrfs_trans_handle *trans;
1285 struct btrfs_root *root = device->dev_root;
1286 struct btrfs_dev_extent *dev_extent = NULL;
1287 struct btrfs_path *path;
1288 u64 length;
1289 u64 chunk_tree;
1290 u64 chunk_objectid;
1291 u64 chunk_offset;
1292 int ret;
1293 int slot;
1294 struct extent_buffer *l;
1295 struct btrfs_key key;
1296 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1297 u64 old_total = btrfs_super_total_bytes(super_copy);
1298 u64 diff = device->total_bytes - new_size;
1299
1300
1301 path = btrfs_alloc_path();
1302 if (!path)
1303 return -ENOMEM;
1304
1305 trans = btrfs_start_transaction(root, 1);
1306 if (!trans) {
1307 ret = -ENOMEM;
1308 goto done;
1309 }
1310
1311 path->reada = 2;
1312
1313 device->total_bytes = new_size;
1314 ret = btrfs_update_device(trans, device);
1315 if (ret) {
1316 btrfs_end_transaction(trans, root);
1317 goto done;
1318 }
1319 WARN_ON(diff > old_total);
1320 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1321 btrfs_end_transaction(trans, root);
1322
1323 key.objectid = device->devid;
1324 key.offset = (u64)-1;
1325 key.type = BTRFS_DEV_EXTENT_KEY;
1326
1327 while (1) {
1328 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1329 if (ret < 0)
1330 goto done;
1331
1332 ret = btrfs_previous_item(root, path, 0, key.type);
1333 if (ret < 0)
1334 goto done;
1335 if (ret) {
1336 ret = 0;
1337 goto done;
1338 }
1339
1340 l = path->nodes[0];
1341 slot = path->slots[0];
1342 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1343
1344 if (key.objectid != device->devid)
1345 goto done;
1346
1347 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1348 length = btrfs_dev_extent_length(l, dev_extent);
1349
1350 if (key.offset + length <= new_size)
1351 goto done;
1352
1353 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1354 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1355 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1356 btrfs_release_path(root, path);
1357
1358 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1359 chunk_offset);
1360 if (ret)
1361 goto done;
1362 }
1363
1364done:
1365 btrfs_free_path(path);
1366 return ret;
1367}
1368
Chris Mason0b86a832008-03-24 15:01:56 -04001369int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1370 struct btrfs_root *root,
1371 struct btrfs_key *key,
1372 struct btrfs_chunk *chunk, int item_size)
1373{
1374 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1375 struct btrfs_disk_key disk_key;
1376 u32 array_size;
1377 u8 *ptr;
1378
1379 array_size = btrfs_super_sys_array_size(super_copy);
1380 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1381 return -EFBIG;
1382
1383 ptr = super_copy->sys_chunk_array + array_size;
1384 btrfs_cpu_key_to_disk(&disk_key, key);
1385 memcpy(ptr, &disk_key, sizeof(disk_key));
1386 ptr += sizeof(disk_key);
1387 memcpy(ptr, chunk, item_size);
1388 item_size += sizeof(disk_key);
1389 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1390 return 0;
1391}
1392
Chris Mason9b3f68b2008-04-18 10:29:51 -04001393static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
1394 int sub_stripes)
1395{
1396 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1397 return calc_size;
1398 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1399 return calc_size * (num_stripes / sub_stripes);
1400 else
1401 return calc_size * num_stripes;
1402}
1403
1404
Chris Mason0b86a832008-03-24 15:01:56 -04001405int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1406 struct btrfs_root *extent_root, u64 *start,
Chris Mason6324fbf2008-03-24 15:01:59 -04001407 u64 *num_bytes, u64 type)
Chris Mason0b86a832008-03-24 15:01:56 -04001408{
1409 u64 dev_offset;
Chris Mason593060d2008-03-25 16:50:33 -04001410 struct btrfs_fs_info *info = extent_root->fs_info;
Chris Mason0b86a832008-03-24 15:01:56 -04001411 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
Chris Mason8f18cf12008-04-25 16:53:30 -04001412 struct btrfs_path *path;
Chris Mason0b86a832008-03-24 15:01:56 -04001413 struct btrfs_stripe *stripes;
1414 struct btrfs_device *device = NULL;
1415 struct btrfs_chunk *chunk;
Chris Mason6324fbf2008-03-24 15:01:59 -04001416 struct list_head private_devs;
Chris Masonb3075712008-04-22 09:22:07 -04001417 struct list_head *dev_list;
Chris Mason6324fbf2008-03-24 15:01:59 -04001418 struct list_head *cur;
Chris Mason0b86a832008-03-24 15:01:56 -04001419 struct extent_map_tree *em_tree;
1420 struct map_lookup *map;
1421 struct extent_map *em;
Chris Masona40a90a2008-04-18 11:55:51 -04001422 int min_stripe_size = 1 * 1024 * 1024;
Chris Mason0b86a832008-03-24 15:01:56 -04001423 u64 physical;
1424 u64 calc_size = 1024 * 1024 * 1024;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001425 u64 max_chunk_size = calc_size;
1426 u64 min_free;
Chris Mason6324fbf2008-03-24 15:01:59 -04001427 u64 avail;
1428 u64 max_avail = 0;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001429 u64 percent_max;
Chris Mason6324fbf2008-03-24 15:01:59 -04001430 int num_stripes = 1;
Chris Masona40a90a2008-04-18 11:55:51 -04001431 int min_stripes = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04001432 int sub_stripes = 0;
Chris Mason6324fbf2008-03-24 15:01:59 -04001433 int looped = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04001434 int ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04001435 int index;
Chris Mason593060d2008-03-25 16:50:33 -04001436 int stripe_len = 64 * 1024;
Chris Mason0b86a832008-03-24 15:01:56 -04001437 struct btrfs_key key;
1438
Chris Masonec44a352008-04-28 15:29:52 -04001439 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1440 (type & BTRFS_BLOCK_GROUP_DUP)) {
1441 WARN_ON(1);
1442 type &= ~BTRFS_BLOCK_GROUP_DUP;
1443 }
Chris Masonb3075712008-04-22 09:22:07 -04001444 dev_list = &extent_root->fs_info->fs_devices->alloc_list;
Chris Mason6324fbf2008-03-24 15:01:59 -04001445 if (list_empty(dev_list))
1446 return -ENOSPC;
Chris Mason593060d2008-03-25 16:50:33 -04001447
Chris Masona40a90a2008-04-18 11:55:51 -04001448 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
Chris Mason0ef3e662008-05-24 14:04:53 -04001449 num_stripes = extent_root->fs_info->fs_devices->open_devices;
Chris Masona40a90a2008-04-18 11:55:51 -04001450 min_stripes = 2;
1451 }
1452 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
Chris Mason611f0e02008-04-03 16:29:03 -04001453 num_stripes = 2;
Chris Masona40a90a2008-04-18 11:55:51 -04001454 min_stripes = 2;
1455 }
Chris Mason8790d502008-04-03 16:29:03 -04001456 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1457 num_stripes = min_t(u64, 2,
Chris Mason0ef3e662008-05-24 14:04:53 -04001458 extent_root->fs_info->fs_devices->open_devices);
Chris Mason9b3f68b2008-04-18 10:29:51 -04001459 if (num_stripes < 2)
1460 return -ENOSPC;
Chris Masona40a90a2008-04-18 11:55:51 -04001461 min_stripes = 2;
Chris Mason8790d502008-04-03 16:29:03 -04001462 }
Chris Mason321aecc2008-04-16 10:49:51 -04001463 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
Chris Mason0ef3e662008-05-24 14:04:53 -04001464 num_stripes = extent_root->fs_info->fs_devices->open_devices;
Chris Mason321aecc2008-04-16 10:49:51 -04001465 if (num_stripes < 4)
1466 return -ENOSPC;
1467 num_stripes &= ~(u32)1;
1468 sub_stripes = 2;
Chris Masona40a90a2008-04-18 11:55:51 -04001469 min_stripes = 4;
Chris Mason321aecc2008-04-16 10:49:51 -04001470 }
Chris Mason9b3f68b2008-04-18 10:29:51 -04001471
1472 if (type & BTRFS_BLOCK_GROUP_DATA) {
1473 max_chunk_size = 10 * calc_size;
Chris Masona40a90a2008-04-18 11:55:51 -04001474 min_stripe_size = 64 * 1024 * 1024;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001475 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1476 max_chunk_size = 4 * calc_size;
Chris Masona40a90a2008-04-18 11:55:51 -04001477 min_stripe_size = 32 * 1024 * 1024;
1478 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1479 calc_size = 8 * 1024 * 1024;
1480 max_chunk_size = calc_size * 2;
1481 min_stripe_size = 1 * 1024 * 1024;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001482 }
1483
Chris Mason8f18cf12008-04-25 16:53:30 -04001484 path = btrfs_alloc_path();
1485 if (!path)
1486 return -ENOMEM;
1487
Chris Mason9b3f68b2008-04-18 10:29:51 -04001488 /* we don't want a chunk larger than 10% of the FS */
1489 percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
1490 max_chunk_size = min(percent_max, max_chunk_size);
1491
Chris Masona40a90a2008-04-18 11:55:51 -04001492again:
Chris Mason9b3f68b2008-04-18 10:29:51 -04001493 if (calc_size * num_stripes > max_chunk_size) {
1494 calc_size = max_chunk_size;
1495 do_div(calc_size, num_stripes);
1496 do_div(calc_size, stripe_len);
1497 calc_size *= stripe_len;
1498 }
1499 /* we don't want tiny stripes */
Chris Masona40a90a2008-04-18 11:55:51 -04001500 calc_size = max_t(u64, min_stripe_size, calc_size);
Chris Mason9b3f68b2008-04-18 10:29:51 -04001501
Chris Mason9b3f68b2008-04-18 10:29:51 -04001502 do_div(calc_size, stripe_len);
1503 calc_size *= stripe_len;
1504
Chris Mason6324fbf2008-03-24 15:01:59 -04001505 INIT_LIST_HEAD(&private_devs);
1506 cur = dev_list->next;
1507 index = 0;
Chris Mason611f0e02008-04-03 16:29:03 -04001508
1509 if (type & BTRFS_BLOCK_GROUP_DUP)
1510 min_free = calc_size * 2;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001511 else
1512 min_free = calc_size;
Chris Mason611f0e02008-04-03 16:29:03 -04001513
Chris Masonad5bd912008-04-21 08:28:10 -04001514 /* we add 1MB because we never use the first 1MB of the device */
1515 min_free += 1024 * 1024;
1516
Chris Mason6324fbf2008-03-24 15:01:59 -04001517 /* build a private list of devices we will allocate from */
1518 while(index < num_stripes) {
Chris Masonb3075712008-04-22 09:22:07 -04001519 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
Chris Mason611f0e02008-04-03 16:29:03 -04001520
Chris Masondfe25022008-05-13 13:46:40 -04001521 if (device->total_bytes > device->bytes_used)
1522 avail = device->total_bytes - device->bytes_used;
1523 else
1524 avail = 0;
Chris Mason6324fbf2008-03-24 15:01:59 -04001525 cur = cur->next;
Chris Mason8f18cf12008-04-25 16:53:30 -04001526
Chris Masondfe25022008-05-13 13:46:40 -04001527 if (device->in_fs_metadata && avail >= min_free) {
Chris Mason8f18cf12008-04-25 16:53:30 -04001528 u64 ignored_start = 0;
1529 ret = find_free_dev_extent(trans, device, path,
1530 min_free,
1531 &ignored_start);
1532 if (ret == 0) {
1533 list_move_tail(&device->dev_alloc_list,
1534 &private_devs);
Chris Mason611f0e02008-04-03 16:29:03 -04001535 index++;
Chris Mason8f18cf12008-04-25 16:53:30 -04001536 if (type & BTRFS_BLOCK_GROUP_DUP)
1537 index++;
1538 }
Chris Masondfe25022008-05-13 13:46:40 -04001539 } else if (device->in_fs_metadata && avail > max_avail)
Chris Masona40a90a2008-04-18 11:55:51 -04001540 max_avail = avail;
Chris Mason6324fbf2008-03-24 15:01:59 -04001541 if (cur == dev_list)
1542 break;
1543 }
1544 if (index < num_stripes) {
1545 list_splice(&private_devs, dev_list);
Chris Masona40a90a2008-04-18 11:55:51 -04001546 if (index >= min_stripes) {
1547 num_stripes = index;
1548 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1549 num_stripes /= sub_stripes;
1550 num_stripes *= sub_stripes;
1551 }
1552 looped = 1;
1553 goto again;
1554 }
Chris Mason6324fbf2008-03-24 15:01:59 -04001555 if (!looped && max_avail > 0) {
1556 looped = 1;
1557 calc_size = max_avail;
1558 goto again;
1559 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001560 btrfs_free_path(path);
Chris Mason6324fbf2008-03-24 15:01:59 -04001561 return -ENOSPC;
1562 }
Chris Masone17cade2008-04-15 15:41:47 -04001563 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1564 key.type = BTRFS_CHUNK_ITEM_KEY;
1565 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1566 &key.offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001567 if (ret) {
1568 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04001569 return ret;
Chris Mason8f18cf12008-04-25 16:53:30 -04001570 }
Chris Mason0b86a832008-03-24 15:01:56 -04001571
Chris Mason0b86a832008-03-24 15:01:56 -04001572 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
Chris Mason8f18cf12008-04-25 16:53:30 -04001573 if (!chunk) {
1574 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04001575 return -ENOMEM;
Chris Mason8f18cf12008-04-25 16:53:30 -04001576 }
Chris Mason0b86a832008-03-24 15:01:56 -04001577
Chris Mason593060d2008-03-25 16:50:33 -04001578 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1579 if (!map) {
1580 kfree(chunk);
Chris Mason8f18cf12008-04-25 16:53:30 -04001581 btrfs_free_path(path);
Chris Mason593060d2008-03-25 16:50:33 -04001582 return -ENOMEM;
1583 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001584 btrfs_free_path(path);
1585 path = NULL;
Chris Mason593060d2008-03-25 16:50:33 -04001586
Chris Mason0b86a832008-03-24 15:01:56 -04001587 stripes = &chunk->stripe;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001588 *num_bytes = chunk_bytes_by_type(type, calc_size,
1589 num_stripes, sub_stripes);
Chris Mason0b86a832008-03-24 15:01:56 -04001590
Chris Mason6324fbf2008-03-24 15:01:59 -04001591 index = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04001592 while(index < num_stripes) {
Chris Masone17cade2008-04-15 15:41:47 -04001593 struct btrfs_stripe *stripe;
Chris Mason6324fbf2008-03-24 15:01:59 -04001594 BUG_ON(list_empty(&private_devs));
1595 cur = private_devs.next;
Chris Masonb3075712008-04-22 09:22:07 -04001596 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
Chris Mason611f0e02008-04-03 16:29:03 -04001597
1598 /* loop over this device again if we're doing a dup group */
1599 if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
1600 (index == num_stripes - 1))
Chris Masonb3075712008-04-22 09:22:07 -04001601 list_move_tail(&device->dev_alloc_list, dev_list);
Chris Mason0b86a832008-03-24 15:01:56 -04001602
1603 ret = btrfs_alloc_dev_extent(trans, device,
Chris Masone17cade2008-04-15 15:41:47 -04001604 info->chunk_root->root_key.objectid,
1605 BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
1606 calc_size, &dev_offset);
Chris Mason0b86a832008-03-24 15:01:56 -04001607 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04001608 device->bytes_used += calc_size;
1609 ret = btrfs_update_device(trans, device);
1610 BUG_ON(ret);
1611
Chris Mason593060d2008-03-25 16:50:33 -04001612 map->stripes[index].dev = device;
1613 map->stripes[index].physical = dev_offset;
Chris Masone17cade2008-04-15 15:41:47 -04001614 stripe = stripes + index;
1615 btrfs_set_stack_stripe_devid(stripe, device->devid);
1616 btrfs_set_stack_stripe_offset(stripe, dev_offset);
1617 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04001618 physical = dev_offset;
1619 index++;
1620 }
Chris Mason6324fbf2008-03-24 15:01:59 -04001621 BUG_ON(!list_empty(&private_devs));
Chris Mason0b86a832008-03-24 15:01:56 -04001622
Chris Masone17cade2008-04-15 15:41:47 -04001623 /* key was set above */
1624 btrfs_set_stack_chunk_length(chunk, *num_bytes);
Chris Mason0b86a832008-03-24 15:01:56 -04001625 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
Chris Mason593060d2008-03-25 16:50:33 -04001626 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
Chris Mason0b86a832008-03-24 15:01:56 -04001627 btrfs_set_stack_chunk_type(chunk, type);
1628 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
Chris Mason593060d2008-03-25 16:50:33 -04001629 btrfs_set_stack_chunk_io_align(chunk, stripe_len);
1630 btrfs_set_stack_chunk_io_width(chunk, stripe_len);
Chris Mason0b86a832008-03-24 15:01:56 -04001631 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
Chris Mason321aecc2008-04-16 10:49:51 -04001632 btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
Chris Mason593060d2008-03-25 16:50:33 -04001633 map->sector_size = extent_root->sectorsize;
1634 map->stripe_len = stripe_len;
1635 map->io_align = stripe_len;
1636 map->io_width = stripe_len;
1637 map->type = type;
1638 map->num_stripes = num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04001639 map->sub_stripes = sub_stripes;
Chris Mason0b86a832008-03-24 15:01:56 -04001640
1641 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
1642 btrfs_chunk_item_size(num_stripes));
1643 BUG_ON(ret);
Chris Masone17cade2008-04-15 15:41:47 -04001644 *start = key.offset;;
Chris Mason0b86a832008-03-24 15:01:56 -04001645
1646 em = alloc_extent_map(GFP_NOFS);
1647 if (!em)
1648 return -ENOMEM;
Chris Mason0b86a832008-03-24 15:01:56 -04001649 em->bdev = (struct block_device *)map;
Chris Masone17cade2008-04-15 15:41:47 -04001650 em->start = key.offset;
1651 em->len = *num_bytes;
Chris Mason0b86a832008-03-24 15:01:56 -04001652 em->block_start = 0;
1653
Chris Mason8f18cf12008-04-25 16:53:30 -04001654 if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1655 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
1656 chunk, btrfs_chunk_item_size(num_stripes));
1657 BUG_ON(ret);
1658 }
Chris Mason0b86a832008-03-24 15:01:56 -04001659 kfree(chunk);
1660
1661 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
1662 spin_lock(&em_tree->lock);
1663 ret = add_extent_mapping(em_tree, em);
Chris Mason0b86a832008-03-24 15:01:56 -04001664 spin_unlock(&em_tree->lock);
Chris Masonb248a412008-04-14 09:48:18 -04001665 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04001666 free_extent_map(em);
1667 return ret;
1668}
1669
1670void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
1671{
1672 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
1673}
1674
1675void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
1676{
1677 struct extent_map *em;
1678
1679 while(1) {
1680 spin_lock(&tree->map_tree.lock);
1681 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
1682 if (em)
1683 remove_extent_mapping(&tree->map_tree, em);
1684 spin_unlock(&tree->map_tree.lock);
1685 if (!em)
1686 break;
1687 kfree(em->bdev);
1688 /* once for us */
1689 free_extent_map(em);
1690 /* once for the tree */
1691 free_extent_map(em);
1692 }
1693}
1694
Chris Masonf1885912008-04-09 16:28:12 -04001695int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
1696{
1697 struct extent_map *em;
1698 struct map_lookup *map;
1699 struct extent_map_tree *em_tree = &map_tree->map_tree;
1700 int ret;
1701
1702 spin_lock(&em_tree->lock);
1703 em = lookup_extent_mapping(em_tree, logical, len);
Chris Masonb248a412008-04-14 09:48:18 -04001704 spin_unlock(&em_tree->lock);
Chris Masonf1885912008-04-09 16:28:12 -04001705 BUG_ON(!em);
1706
1707 BUG_ON(em->start > logical || em->start + em->len < logical);
1708 map = (struct map_lookup *)em->bdev;
1709 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
1710 ret = map->num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04001711 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
1712 ret = map->sub_stripes;
Chris Masonf1885912008-04-09 16:28:12 -04001713 else
1714 ret = 1;
1715 free_extent_map(em);
Chris Masonf1885912008-04-09 16:28:12 -04001716 return ret;
1717}
1718
Chris Masondfe25022008-05-13 13:46:40 -04001719static int find_live_mirror(struct map_lookup *map, int first, int num,
1720 int optimal)
1721{
1722 int i;
1723 if (map->stripes[optimal].dev->bdev)
1724 return optimal;
1725 for (i = first; i < first + num; i++) {
1726 if (map->stripes[i].dev->bdev)
1727 return i;
1728 }
1729 /* we couldn't find one that doesn't fail. Just return something
1730 * and the io error handling code will clean up eventually
1731 */
1732 return optimal;
1733}
1734
Chris Masonf2d8d742008-04-21 10:03:05 -04001735static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1736 u64 logical, u64 *length,
1737 struct btrfs_multi_bio **multi_ret,
1738 int mirror_num, struct page *unplug_page)
Chris Mason0b86a832008-03-24 15:01:56 -04001739{
1740 struct extent_map *em;
1741 struct map_lookup *map;
1742 struct extent_map_tree *em_tree = &map_tree->map_tree;
1743 u64 offset;
Chris Mason593060d2008-03-25 16:50:33 -04001744 u64 stripe_offset;
1745 u64 stripe_nr;
Chris Masoncea9e442008-04-09 16:28:12 -04001746 int stripes_allocated = 8;
Chris Mason321aecc2008-04-16 10:49:51 -04001747 int stripes_required = 1;
Chris Mason593060d2008-03-25 16:50:33 -04001748 int stripe_index;
Chris Masoncea9e442008-04-09 16:28:12 -04001749 int i;
Chris Masonf2d8d742008-04-21 10:03:05 -04001750 int num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04001751 int max_errors = 0;
Chris Masoncea9e442008-04-09 16:28:12 -04001752 struct btrfs_multi_bio *multi = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04001753
Chris Masoncea9e442008-04-09 16:28:12 -04001754 if (multi_ret && !(rw & (1 << BIO_RW))) {
1755 stripes_allocated = 1;
1756 }
1757again:
1758 if (multi_ret) {
1759 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
1760 GFP_NOFS);
1761 if (!multi)
1762 return -ENOMEM;
Chris Masona236aed2008-04-29 09:38:00 -04001763
1764 atomic_set(&multi->error, 0);
Chris Masoncea9e442008-04-09 16:28:12 -04001765 }
Chris Mason0b86a832008-03-24 15:01:56 -04001766
1767 spin_lock(&em_tree->lock);
1768 em = lookup_extent_mapping(em_tree, logical, *length);
Chris Masonb248a412008-04-14 09:48:18 -04001769 spin_unlock(&em_tree->lock);
Chris Masonf2d8d742008-04-21 10:03:05 -04001770
1771 if (!em && unplug_page)
1772 return 0;
1773
Chris Mason3b951512008-04-17 11:29:12 -04001774 if (!em) {
Chris Masona061fc82008-05-07 11:43:44 -04001775 printk("unable to find logical %Lu len %Lu\n", logical, *length);
Chris Masonf2d8d742008-04-21 10:03:05 -04001776 BUG();
Chris Mason3b951512008-04-17 11:29:12 -04001777 }
Chris Mason0b86a832008-03-24 15:01:56 -04001778
1779 BUG_ON(em->start > logical || em->start + em->len < logical);
1780 map = (struct map_lookup *)em->bdev;
1781 offset = logical - em->start;
Chris Mason593060d2008-03-25 16:50:33 -04001782
Chris Masonf1885912008-04-09 16:28:12 -04001783 if (mirror_num > map->num_stripes)
1784 mirror_num = 0;
1785
Chris Masoncea9e442008-04-09 16:28:12 -04001786 /* if our multi bio struct is too small, back off and try again */
Chris Mason321aecc2008-04-16 10:49:51 -04001787 if (rw & (1 << BIO_RW)) {
1788 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
1789 BTRFS_BLOCK_GROUP_DUP)) {
1790 stripes_required = map->num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04001791 max_errors = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04001792 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1793 stripes_required = map->sub_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04001794 max_errors = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04001795 }
1796 }
1797 if (multi_ret && rw == WRITE &&
1798 stripes_allocated < stripes_required) {
Chris Masoncea9e442008-04-09 16:28:12 -04001799 stripes_allocated = map->num_stripes;
Chris Masoncea9e442008-04-09 16:28:12 -04001800 free_extent_map(em);
1801 kfree(multi);
1802 goto again;
1803 }
Chris Mason593060d2008-03-25 16:50:33 -04001804 stripe_nr = offset;
1805 /*
1806 * stripe_nr counts the total number of stripes we have to stride
1807 * to get to this block
1808 */
1809 do_div(stripe_nr, map->stripe_len);
1810
1811 stripe_offset = stripe_nr * map->stripe_len;
1812 BUG_ON(offset < stripe_offset);
1813
1814 /* stripe_offset is the offset of this block in its stripe*/
1815 stripe_offset = offset - stripe_offset;
1816
Chris Masoncea9e442008-04-09 16:28:12 -04001817 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
Chris Mason321aecc2008-04-16 10:49:51 -04001818 BTRFS_BLOCK_GROUP_RAID10 |
Chris Masoncea9e442008-04-09 16:28:12 -04001819 BTRFS_BLOCK_GROUP_DUP)) {
1820 /* we limit the length of each bio to what fits in a stripe */
1821 *length = min_t(u64, em->len - offset,
1822 map->stripe_len - stripe_offset);
1823 } else {
1824 *length = em->len - offset;
1825 }
Chris Masonf2d8d742008-04-21 10:03:05 -04001826
1827 if (!multi_ret && !unplug_page)
Chris Masoncea9e442008-04-09 16:28:12 -04001828 goto out;
1829
Chris Masonf2d8d742008-04-21 10:03:05 -04001830 num_stripes = 1;
Chris Masoncea9e442008-04-09 16:28:12 -04001831 stripe_index = 0;
Chris Mason8790d502008-04-03 16:29:03 -04001832 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
Chris Masonf2d8d742008-04-21 10:03:05 -04001833 if (unplug_page || (rw & (1 << BIO_RW)))
1834 num_stripes = map->num_stripes;
Chris Mason2fff7342008-04-29 14:12:09 -04001835 else if (mirror_num)
Chris Masonf1885912008-04-09 16:28:12 -04001836 stripe_index = mirror_num - 1;
Chris Masondfe25022008-05-13 13:46:40 -04001837 else {
1838 stripe_index = find_live_mirror(map, 0,
1839 map->num_stripes,
1840 current->pid % map->num_stripes);
1841 }
Chris Mason2fff7342008-04-29 14:12:09 -04001842
Chris Mason611f0e02008-04-03 16:29:03 -04001843 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
Chris Masoncea9e442008-04-09 16:28:12 -04001844 if (rw & (1 << BIO_RW))
Chris Masonf2d8d742008-04-21 10:03:05 -04001845 num_stripes = map->num_stripes;
Chris Masonf1885912008-04-09 16:28:12 -04001846 else if (mirror_num)
1847 stripe_index = mirror_num - 1;
Chris Mason2fff7342008-04-29 14:12:09 -04001848
Chris Mason321aecc2008-04-16 10:49:51 -04001849 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1850 int factor = map->num_stripes / map->sub_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04001851
1852 stripe_index = do_div(stripe_nr, factor);
1853 stripe_index *= map->sub_stripes;
1854
Chris Masonf2d8d742008-04-21 10:03:05 -04001855 if (unplug_page || (rw & (1 << BIO_RW)))
1856 num_stripes = map->sub_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04001857 else if (mirror_num)
1858 stripe_index += mirror_num - 1;
Chris Masondfe25022008-05-13 13:46:40 -04001859 else {
1860 stripe_index = find_live_mirror(map, stripe_index,
1861 map->sub_stripes, stripe_index +
1862 current->pid % map->sub_stripes);
1863 }
Chris Mason8790d502008-04-03 16:29:03 -04001864 } else {
1865 /*
1866 * after this do_div call, stripe_nr is the number of stripes
1867 * on this device we have to walk to find the data, and
1868 * stripe_index is the number of our device in the stripe array
1869 */
1870 stripe_index = do_div(stripe_nr, map->num_stripes);
1871 }
Chris Mason593060d2008-03-25 16:50:33 -04001872 BUG_ON(stripe_index >= map->num_stripes);
Chris Mason593060d2008-03-25 16:50:33 -04001873
Chris Masonf2d8d742008-04-21 10:03:05 -04001874 for (i = 0; i < num_stripes; i++) {
1875 if (unplug_page) {
1876 struct btrfs_device *device;
1877 struct backing_dev_info *bdi;
1878
1879 device = map->stripes[stripe_index].dev;
Chris Masondfe25022008-05-13 13:46:40 -04001880 if (device->bdev) {
1881 bdi = blk_get_backing_dev_info(device->bdev);
1882 if (bdi->unplug_io_fn) {
1883 bdi->unplug_io_fn(bdi, unplug_page);
1884 }
Chris Masonf2d8d742008-04-21 10:03:05 -04001885 }
1886 } else {
1887 multi->stripes[i].physical =
1888 map->stripes[stripe_index].physical +
1889 stripe_offset + stripe_nr * map->stripe_len;
1890 multi->stripes[i].dev = map->stripes[stripe_index].dev;
1891 }
Chris Masoncea9e442008-04-09 16:28:12 -04001892 stripe_index++;
Chris Mason593060d2008-03-25 16:50:33 -04001893 }
Chris Masonf2d8d742008-04-21 10:03:05 -04001894 if (multi_ret) {
1895 *multi_ret = multi;
1896 multi->num_stripes = num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04001897 multi->max_errors = max_errors;
Chris Masonf2d8d742008-04-21 10:03:05 -04001898 }
Chris Masoncea9e442008-04-09 16:28:12 -04001899out:
Chris Mason0b86a832008-03-24 15:01:56 -04001900 free_extent_map(em);
Chris Mason0b86a832008-03-24 15:01:56 -04001901 return 0;
1902}
1903
Chris Masonf2d8d742008-04-21 10:03:05 -04001904int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1905 u64 logical, u64 *length,
1906 struct btrfs_multi_bio **multi_ret, int mirror_num)
1907{
1908 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
1909 mirror_num, NULL);
1910}
1911
1912int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
1913 u64 logical, struct page *page)
1914{
1915 u64 length = PAGE_CACHE_SIZE;
1916 return __btrfs_map_block(map_tree, READ, logical, &length,
1917 NULL, 0, page);
1918}
1919
1920
Chris Mason8790d502008-04-03 16:29:03 -04001921#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1922static void end_bio_multi_stripe(struct bio *bio, int err)
1923#else
1924static int end_bio_multi_stripe(struct bio *bio,
1925 unsigned int bytes_done, int err)
1926#endif
1927{
Chris Masoncea9e442008-04-09 16:28:12 -04001928 struct btrfs_multi_bio *multi = bio->bi_private;
Chris Mason8790d502008-04-03 16:29:03 -04001929
1930#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1931 if (bio->bi_size)
1932 return 1;
1933#endif
1934 if (err)
Chris Masona236aed2008-04-29 09:38:00 -04001935 atomic_inc(&multi->error);
Chris Mason8790d502008-04-03 16:29:03 -04001936
Chris Masoncea9e442008-04-09 16:28:12 -04001937 if (atomic_dec_and_test(&multi->stripes_pending)) {
Chris Mason8790d502008-04-03 16:29:03 -04001938 bio->bi_private = multi->private;
1939 bio->bi_end_io = multi->end_io;
Chris Masona236aed2008-04-29 09:38:00 -04001940 /* only send an error to the higher layers if it is
1941 * beyond the tolerance of the multi-bio
1942 */
Chris Mason1259ab72008-05-12 13:39:03 -04001943 if (atomic_read(&multi->error) > multi->max_errors) {
Chris Masona236aed2008-04-29 09:38:00 -04001944 err = -EIO;
Chris Mason1259ab72008-05-12 13:39:03 -04001945 } else if (err) {
1946 /*
1947 * this bio is actually up to date, we didn't
1948 * go over the max number of errors
1949 */
1950 set_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Masona236aed2008-04-29 09:38:00 -04001951 err = 0;
Chris Mason1259ab72008-05-12 13:39:03 -04001952 }
Chris Mason8790d502008-04-03 16:29:03 -04001953 kfree(multi);
1954
Miguel73f61b22008-04-11 15:50:59 -04001955#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1956 bio_endio(bio, bio->bi_size, err);
1957#else
Chris Mason8790d502008-04-03 16:29:03 -04001958 bio_endio(bio, err);
Miguel73f61b22008-04-11 15:50:59 -04001959#endif
Chris Mason8790d502008-04-03 16:29:03 -04001960 } else {
1961 bio_put(bio);
1962 }
1963#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1964 return 0;
1965#endif
1966}
1967
Chris Masonf1885912008-04-09 16:28:12 -04001968int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
1969 int mirror_num)
Chris Mason0b86a832008-03-24 15:01:56 -04001970{
1971 struct btrfs_mapping_tree *map_tree;
1972 struct btrfs_device *dev;
Chris Mason8790d502008-04-03 16:29:03 -04001973 struct bio *first_bio = bio;
Chris Mason0b86a832008-03-24 15:01:56 -04001974 u64 logical = bio->bi_sector << 9;
Chris Mason0b86a832008-03-24 15:01:56 -04001975 u64 length = 0;
1976 u64 map_length;
Chris Masoncea9e442008-04-09 16:28:12 -04001977 struct btrfs_multi_bio *multi = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04001978 int ret;
Chris Mason8790d502008-04-03 16:29:03 -04001979 int dev_nr = 0;
1980 int total_devs = 1;
Chris Mason0b86a832008-03-24 15:01:56 -04001981
Chris Masonf2d8d742008-04-21 10:03:05 -04001982 length = bio->bi_size;
Chris Mason0b86a832008-03-24 15:01:56 -04001983 map_tree = &root->fs_info->mapping_tree;
1984 map_length = length;
Chris Masoncea9e442008-04-09 16:28:12 -04001985
Chris Masonf1885912008-04-09 16:28:12 -04001986 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
1987 mirror_num);
Chris Masoncea9e442008-04-09 16:28:12 -04001988 BUG_ON(ret);
1989
1990 total_devs = multi->num_stripes;
1991 if (map_length < length) {
1992 printk("mapping failed logical %Lu bio len %Lu "
1993 "len %Lu\n", logical, length, map_length);
1994 BUG();
1995 }
1996 multi->end_io = first_bio->bi_end_io;
1997 multi->private = first_bio->bi_private;
1998 atomic_set(&multi->stripes_pending, multi->num_stripes);
1999
Chris Mason8790d502008-04-03 16:29:03 -04002000 while(dev_nr < total_devs) {
Chris Mason8790d502008-04-03 16:29:03 -04002001 if (total_devs > 1) {
Chris Mason8790d502008-04-03 16:29:03 -04002002 if (dev_nr < total_devs - 1) {
2003 bio = bio_clone(first_bio, GFP_NOFS);
2004 BUG_ON(!bio);
2005 } else {
2006 bio = first_bio;
2007 }
2008 bio->bi_private = multi;
2009 bio->bi_end_io = end_bio_multi_stripe;
2010 }
Chris Masoncea9e442008-04-09 16:28:12 -04002011 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2012 dev = multi->stripes[dev_nr].dev;
Chris Masondfe25022008-05-13 13:46:40 -04002013 if (dev && dev->bdev) {
2014 bio->bi_bdev = dev->bdev;
2015 spin_lock(&dev->io_lock);
2016 dev->total_ios++;
2017 spin_unlock(&dev->io_lock);
2018 submit_bio(rw, bio);
2019 } else {
2020 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2021 bio->bi_sector = logical >> 9;
2022#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
2023 bio_endio(bio, bio->bi_size, -EIO);
2024#else
2025 bio_endio(bio, -EIO);
2026#endif
2027 }
Chris Mason8790d502008-04-03 16:29:03 -04002028 dev_nr++;
Chris Mason239b14b2008-03-24 15:02:07 -04002029 }
Chris Masoncea9e442008-04-09 16:28:12 -04002030 if (total_devs == 1)
2031 kfree(multi);
Chris Mason0b86a832008-03-24 15:01:56 -04002032 return 0;
2033}
2034
Chris Masona4437552008-04-18 10:29:38 -04002035struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2036 u8 *uuid)
Chris Mason0b86a832008-03-24 15:01:56 -04002037{
Chris Mason8a4b83c2008-03-24 15:02:07 -04002038 struct list_head *head = &root->fs_info->fs_devices->devices;
Chris Mason0b86a832008-03-24 15:01:56 -04002039
Chris Masona4437552008-04-18 10:29:38 -04002040 return __find_device(head, devid, uuid);
Chris Mason0b86a832008-03-24 15:01:56 -04002041}
2042
Chris Masondfe25022008-05-13 13:46:40 -04002043static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2044 u64 devid, u8 *dev_uuid)
2045{
2046 struct btrfs_device *device;
2047 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2048
2049 device = kzalloc(sizeof(*device), GFP_NOFS);
2050 list_add(&device->dev_list,
2051 &fs_devices->devices);
2052 list_add(&device->dev_alloc_list,
2053 &fs_devices->alloc_list);
2054 device->barriers = 1;
2055 device->dev_root = root->fs_info->dev_root;
2056 device->devid = devid;
2057 fs_devices->num_devices++;
2058 spin_lock_init(&device->io_lock);
2059 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2060 return device;
2061}
2062
2063
Chris Mason0b86a832008-03-24 15:01:56 -04002064static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2065 struct extent_buffer *leaf,
2066 struct btrfs_chunk *chunk)
2067{
2068 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2069 struct map_lookup *map;
2070 struct extent_map *em;
2071 u64 logical;
2072 u64 length;
2073 u64 devid;
Chris Masona4437552008-04-18 10:29:38 -04002074 u8 uuid[BTRFS_UUID_SIZE];
Chris Mason593060d2008-03-25 16:50:33 -04002075 int num_stripes;
Chris Mason0b86a832008-03-24 15:01:56 -04002076 int ret;
Chris Mason593060d2008-03-25 16:50:33 -04002077 int i;
Chris Mason0b86a832008-03-24 15:01:56 -04002078
Chris Masone17cade2008-04-15 15:41:47 -04002079 logical = key->offset;
2080 length = btrfs_chunk_length(leaf, chunk);
Chris Masona061fc82008-05-07 11:43:44 -04002081
Chris Mason0b86a832008-03-24 15:01:56 -04002082 spin_lock(&map_tree->map_tree.lock);
2083 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
Chris Masonb248a412008-04-14 09:48:18 -04002084 spin_unlock(&map_tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04002085
2086 /* already mapped? */
2087 if (em && em->start <= logical && em->start + em->len > logical) {
2088 free_extent_map(em);
Chris Mason0b86a832008-03-24 15:01:56 -04002089 return 0;
2090 } else if (em) {
2091 free_extent_map(em);
2092 }
Chris Mason0b86a832008-03-24 15:01:56 -04002093
2094 map = kzalloc(sizeof(*map), GFP_NOFS);
2095 if (!map)
2096 return -ENOMEM;
2097
2098 em = alloc_extent_map(GFP_NOFS);
2099 if (!em)
2100 return -ENOMEM;
Chris Mason593060d2008-03-25 16:50:33 -04002101 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2102 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
Chris Mason0b86a832008-03-24 15:01:56 -04002103 if (!map) {
2104 free_extent_map(em);
2105 return -ENOMEM;
2106 }
2107
2108 em->bdev = (struct block_device *)map;
2109 em->start = logical;
2110 em->len = length;
2111 em->block_start = 0;
2112
Chris Mason593060d2008-03-25 16:50:33 -04002113 map->num_stripes = num_stripes;
2114 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2115 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2116 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2117 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2118 map->type = btrfs_chunk_type(leaf, chunk);
Chris Mason321aecc2008-04-16 10:49:51 -04002119 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
Chris Mason593060d2008-03-25 16:50:33 -04002120 for (i = 0; i < num_stripes; i++) {
2121 map->stripes[i].physical =
2122 btrfs_stripe_offset_nr(leaf, chunk, i);
2123 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
Chris Masona4437552008-04-18 10:29:38 -04002124 read_extent_buffer(leaf, uuid, (unsigned long)
2125 btrfs_stripe_dev_uuid_nr(chunk, i),
2126 BTRFS_UUID_SIZE);
2127 map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
Chris Masondfe25022008-05-13 13:46:40 -04002128
2129 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
Chris Mason593060d2008-03-25 16:50:33 -04002130 kfree(map);
2131 free_extent_map(em);
2132 return -EIO;
2133 }
Chris Masondfe25022008-05-13 13:46:40 -04002134 if (!map->stripes[i].dev) {
2135 map->stripes[i].dev =
2136 add_missing_dev(root, devid, uuid);
2137 if (!map->stripes[i].dev) {
2138 kfree(map);
2139 free_extent_map(em);
2140 return -EIO;
2141 }
2142 }
2143 map->stripes[i].dev->in_fs_metadata = 1;
Chris Mason0b86a832008-03-24 15:01:56 -04002144 }
2145
2146 spin_lock(&map_tree->map_tree.lock);
2147 ret = add_extent_mapping(&map_tree->map_tree, em);
Chris Mason0b86a832008-03-24 15:01:56 -04002148 spin_unlock(&map_tree->map_tree.lock);
Chris Masonb248a412008-04-14 09:48:18 -04002149 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04002150 free_extent_map(em);
2151
2152 return 0;
2153}
2154
2155static int fill_device_from_item(struct extent_buffer *leaf,
2156 struct btrfs_dev_item *dev_item,
2157 struct btrfs_device *device)
2158{
2159 unsigned long ptr;
Chris Mason0b86a832008-03-24 15:01:56 -04002160
2161 device->devid = btrfs_device_id(leaf, dev_item);
2162 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2163 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2164 device->type = btrfs_device_type(leaf, dev_item);
2165 device->io_align = btrfs_device_io_align(leaf, dev_item);
2166 device->io_width = btrfs_device_io_width(leaf, dev_item);
2167 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
Chris Mason0b86a832008-03-24 15:01:56 -04002168
2169 ptr = (unsigned long)btrfs_device_uuid(dev_item);
Chris Masone17cade2008-04-15 15:41:47 -04002170 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04002171
Chris Mason0b86a832008-03-24 15:01:56 -04002172 return 0;
2173}
2174
Chris Mason0d81ba52008-03-24 15:02:07 -04002175static int read_one_dev(struct btrfs_root *root,
Chris Mason0b86a832008-03-24 15:01:56 -04002176 struct extent_buffer *leaf,
2177 struct btrfs_dev_item *dev_item)
2178{
2179 struct btrfs_device *device;
2180 u64 devid;
2181 int ret;
Chris Masona4437552008-04-18 10:29:38 -04002182 u8 dev_uuid[BTRFS_UUID_SIZE];
2183
Chris Mason0b86a832008-03-24 15:01:56 -04002184 devid = btrfs_device_id(leaf, dev_item);
Chris Masona4437552008-04-18 10:29:38 -04002185 read_extent_buffer(leaf, dev_uuid,
2186 (unsigned long)btrfs_device_uuid(dev_item),
2187 BTRFS_UUID_SIZE);
2188 device = btrfs_find_device(root, devid, dev_uuid);
Chris Mason6324fbf2008-03-24 15:01:59 -04002189 if (!device) {
Chris Masondfe25022008-05-13 13:46:40 -04002190 printk("warning devid %Lu missing\n", devid);
2191 device = add_missing_dev(root, devid, dev_uuid);
Chris Mason6324fbf2008-03-24 15:01:59 -04002192 if (!device)
2193 return -ENOMEM;
Chris Mason6324fbf2008-03-24 15:01:59 -04002194 }
Chris Mason0b86a832008-03-24 15:01:56 -04002195
2196 fill_device_from_item(leaf, dev_item, device);
2197 device->dev_root = root->fs_info->dev_root;
Chris Masondfe25022008-05-13 13:46:40 -04002198 device->in_fs_metadata = 1;
Chris Mason0b86a832008-03-24 15:01:56 -04002199 ret = 0;
2200#if 0
2201 ret = btrfs_open_device(device);
2202 if (ret) {
2203 kfree(device);
2204 }
2205#endif
2206 return ret;
2207}
2208
Chris Mason0d81ba52008-03-24 15:02:07 -04002209int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2210{
2211 struct btrfs_dev_item *dev_item;
2212
2213 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2214 dev_item);
2215 return read_one_dev(root, buf, dev_item);
2216}
2217
Chris Mason0b86a832008-03-24 15:01:56 -04002218int btrfs_read_sys_array(struct btrfs_root *root)
2219{
2220 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
Chris Masona061fc82008-05-07 11:43:44 -04002221 struct extent_buffer *sb;
Chris Mason0b86a832008-03-24 15:01:56 -04002222 struct btrfs_disk_key *disk_key;
Chris Mason0b86a832008-03-24 15:01:56 -04002223 struct btrfs_chunk *chunk;
Chris Mason84eed902008-04-25 09:04:37 -04002224 u8 *ptr;
2225 unsigned long sb_ptr;
2226 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04002227 u32 num_stripes;
2228 u32 array_size;
2229 u32 len = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04002230 u32 cur;
Chris Mason84eed902008-04-25 09:04:37 -04002231 struct btrfs_key key;
Chris Mason0b86a832008-03-24 15:01:56 -04002232
Chris Masona061fc82008-05-07 11:43:44 -04002233 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
2234 BTRFS_SUPER_INFO_SIZE);
2235 if (!sb)
2236 return -ENOMEM;
2237 btrfs_set_buffer_uptodate(sb);
2238 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04002239 array_size = btrfs_super_sys_array_size(super_copy);
2240
Chris Mason0b86a832008-03-24 15:01:56 -04002241 ptr = super_copy->sys_chunk_array;
2242 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
2243 cur = 0;
2244
2245 while (cur < array_size) {
2246 disk_key = (struct btrfs_disk_key *)ptr;
2247 btrfs_disk_key_to_cpu(&key, disk_key);
2248
Chris Masona061fc82008-05-07 11:43:44 -04002249 len = sizeof(*disk_key); ptr += len;
Chris Mason0b86a832008-03-24 15:01:56 -04002250 sb_ptr += len;
2251 cur += len;
2252
Chris Mason0d81ba52008-03-24 15:02:07 -04002253 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
Chris Mason0b86a832008-03-24 15:01:56 -04002254 chunk = (struct btrfs_chunk *)sb_ptr;
Chris Mason0d81ba52008-03-24 15:02:07 -04002255 ret = read_one_chunk(root, &key, sb, chunk);
Chris Mason84eed902008-04-25 09:04:37 -04002256 if (ret)
2257 break;
Chris Mason0b86a832008-03-24 15:01:56 -04002258 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
2259 len = btrfs_chunk_item_size(num_stripes);
2260 } else {
Chris Mason84eed902008-04-25 09:04:37 -04002261 ret = -EIO;
2262 break;
Chris Mason0b86a832008-03-24 15:01:56 -04002263 }
2264 ptr += len;
2265 sb_ptr += len;
2266 cur += len;
2267 }
Chris Masona061fc82008-05-07 11:43:44 -04002268 free_extent_buffer(sb);
Chris Mason84eed902008-04-25 09:04:37 -04002269 return ret;
Chris Mason0b86a832008-03-24 15:01:56 -04002270}
2271
2272int btrfs_read_chunk_tree(struct btrfs_root *root)
2273{
2274 struct btrfs_path *path;
2275 struct extent_buffer *leaf;
2276 struct btrfs_key key;
2277 struct btrfs_key found_key;
2278 int ret;
2279 int slot;
2280
2281 root = root->fs_info->chunk_root;
2282
2283 path = btrfs_alloc_path();
2284 if (!path)
2285 return -ENOMEM;
2286
2287 /* first we search for all of the device items, and then we
2288 * read in all of the chunk items. This way we can create chunk
2289 * mappings that reference all of the devices that are afound
2290 */
2291 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2292 key.offset = 0;
2293 key.type = 0;
2294again:
2295 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2296 while(1) {
2297 leaf = path->nodes[0];
2298 slot = path->slots[0];
2299 if (slot >= btrfs_header_nritems(leaf)) {
2300 ret = btrfs_next_leaf(root, path);
2301 if (ret == 0)
2302 continue;
2303 if (ret < 0)
2304 goto error;
2305 break;
2306 }
2307 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2308 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2309 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
2310 break;
2311 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
2312 struct btrfs_dev_item *dev_item;
2313 dev_item = btrfs_item_ptr(leaf, slot,
2314 struct btrfs_dev_item);
Chris Mason0d81ba52008-03-24 15:02:07 -04002315 ret = read_one_dev(root, leaf, dev_item);
Chris Mason0b86a832008-03-24 15:01:56 -04002316 BUG_ON(ret);
2317 }
2318 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
2319 struct btrfs_chunk *chunk;
2320 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2321 ret = read_one_chunk(root, &found_key, leaf, chunk);
2322 }
2323 path->slots[0]++;
2324 }
2325 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2326 key.objectid = 0;
2327 btrfs_release_path(root, path);
2328 goto again;
2329 }
2330
2331 btrfs_free_path(path);
2332 ret = 0;
2333error:
2334 return ret;
2335}
2336