blob: c3ee63f92a5f968518a0d1f7a10911baac14d819 [file] [log] [blame]
Chris Mason0b86a832008-03-24 15:01:56 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
Chris Mason8a4b83c2008-03-24 15:02:07 -040020#include <linux/buffer_head.h>
Chris Masonf2d8d742008-04-21 10:03:05 -040021#include <linux/blkdev.h>
Chris Mason788f20e2008-04-28 15:29:42 -040022#include <linux/random.h>
Chris Mason593060d2008-03-25 16:50:33 -040023#include <asm/div64.h>
Chris Mason0b86a832008-03-24 15:01:56 -040024#include "ctree.h"
25#include "extent_map.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "print-tree.h"
29#include "volumes.h"
Chris Mason8b712842008-06-11 16:50:36 -040030#include "async-thread.h"
Chris Mason0b86a832008-03-24 15:01:56 -040031
Chris Mason593060d2008-03-25 16:50:33 -040032struct map_lookup {
33 u64 type;
34 int io_align;
35 int io_width;
36 int stripe_len;
37 int sector_size;
38 int num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -040039 int sub_stripes;
Chris Masoncea9e442008-04-09 16:28:12 -040040 struct btrfs_bio_stripe stripes[];
Chris Mason593060d2008-03-25 16:50:33 -040041};
42
Yan Zheng2b820322008-11-17 21:11:30 -050043static int init_first_rw_device(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root,
45 struct btrfs_device *device);
46static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
47
48
Chris Mason593060d2008-03-25 16:50:33 -040049#define map_lookup_size(n) (sizeof(struct map_lookup) + \
Chris Masoncea9e442008-04-09 16:28:12 -040050 (sizeof(struct btrfs_bio_stripe) * (n)))
Chris Mason593060d2008-03-25 16:50:33 -040051
Chris Mason8a4b83c2008-03-24 15:02:07 -040052static DEFINE_MUTEX(uuid_mutex);
53static LIST_HEAD(fs_uuids);
54
Chris Masona061fc82008-05-07 11:43:44 -040055void btrfs_lock_volumes(void)
56{
57 mutex_lock(&uuid_mutex);
58}
59
60void btrfs_unlock_volumes(void)
61{
62 mutex_unlock(&uuid_mutex);
63}
64
Chris Mason7d9eb122008-07-08 14:19:17 -040065static void lock_chunks(struct btrfs_root *root)
66{
Chris Mason7d9eb122008-07-08 14:19:17 -040067 mutex_lock(&root->fs_info->chunk_mutex);
68}
69
70static void unlock_chunks(struct btrfs_root *root)
71{
Chris Mason7d9eb122008-07-08 14:19:17 -040072 mutex_unlock(&root->fs_info->chunk_mutex);
73}
74
Chris Mason8a4b83c2008-03-24 15:02:07 -040075int btrfs_cleanup_fs_uuids(void)
76{
77 struct btrfs_fs_devices *fs_devices;
Chris Mason8a4b83c2008-03-24 15:02:07 -040078 struct btrfs_device *dev;
79
Yan Zheng2b820322008-11-17 21:11:30 -050080 while (!list_empty(&fs_uuids)) {
81 fs_devices = list_entry(fs_uuids.next,
82 struct btrfs_fs_devices, list);
83 list_del(&fs_devices->list);
Chris Mason8a4b83c2008-03-24 15:02:07 -040084 while(!list_empty(&fs_devices->devices)) {
Yan Zheng2b820322008-11-17 21:11:30 -050085 dev = list_entry(fs_devices->devices.next,
86 struct btrfs_device, dev_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -040087 if (dev->bdev) {
Chris Mason15916de2008-11-19 21:17:22 -050088 close_bdev_exclusive(dev->bdev, dev->mode);
Chris Masona0af4692008-05-13 16:03:06 -040089 fs_devices->open_devices--;
Chris Mason8a4b83c2008-03-24 15:02:07 -040090 }
Yan Zheng2b820322008-11-17 21:11:30 -050091 fs_devices->num_devices--;
92 if (dev->writeable)
93 fs_devices->rw_devices--;
Chris Mason8a4b83c2008-03-24 15:02:07 -040094 list_del(&dev->dev_list);
Yan Zheng2b820322008-11-17 21:11:30 -050095 list_del(&dev->dev_alloc_list);
Chris Masondfe25022008-05-13 13:46:40 -040096 kfree(dev->name);
Chris Mason8a4b83c2008-03-24 15:02:07 -040097 kfree(dev);
98 }
Yan Zheng2b820322008-11-17 21:11:30 -050099 WARN_ON(fs_devices->num_devices);
100 WARN_ON(fs_devices->open_devices);
101 WARN_ON(fs_devices->rw_devices);
102 kfree(fs_devices);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400103 }
104 return 0;
105}
106
Chris Masona1b32a52008-09-05 16:09:51 -0400107static noinline struct btrfs_device *__find_device(struct list_head *head,
108 u64 devid, u8 *uuid)
Chris Mason8a4b83c2008-03-24 15:02:07 -0400109{
110 struct btrfs_device *dev;
111 struct list_head *cur;
112
113 list_for_each(cur, head) {
114 dev = list_entry(cur, struct btrfs_device, dev_list);
Chris Masona4437552008-04-18 10:29:38 -0400115 if (dev->devid == devid &&
Chris Mason8f18cf12008-04-25 16:53:30 -0400116 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
Chris Mason8a4b83c2008-03-24 15:02:07 -0400117 return dev;
Chris Masona4437552008-04-18 10:29:38 -0400118 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400119 }
120 return NULL;
121}
122
Chris Masona1b32a52008-09-05 16:09:51 -0400123static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
Chris Mason8a4b83c2008-03-24 15:02:07 -0400124{
125 struct list_head *cur;
126 struct btrfs_fs_devices *fs_devices;
127
128 list_for_each(cur, &fs_uuids) {
129 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
130 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
131 return fs_devices;
132 }
133 return NULL;
134}
135
Chris Mason8b712842008-06-11 16:50:36 -0400136/*
137 * we try to collect pending bios for a device so we don't get a large
138 * number of procs sending bios down to the same device. This greatly
139 * improves the schedulers ability to collect and merge the bios.
140 *
141 * But, it also turns into a long list of bios to process and that is sure
142 * to eventually make the worker thread block. The solution here is to
143 * make some progress and then put this work struct back at the end of
144 * the list if the block device is congested. This way, multiple devices
145 * can make progress from a single worker thread.
146 */
Chris Masona1b32a52008-09-05 16:09:51 -0400147static int noinline run_scheduled_bios(struct btrfs_device *device)
Chris Mason8b712842008-06-11 16:50:36 -0400148{
149 struct bio *pending;
150 struct backing_dev_info *bdi;
Chris Masonb64a2852008-08-20 13:39:41 -0400151 struct btrfs_fs_info *fs_info;
Chris Mason8b712842008-06-11 16:50:36 -0400152 struct bio *tail;
153 struct bio *cur;
154 int again = 0;
155 unsigned long num_run = 0;
Chris Masonb64a2852008-08-20 13:39:41 -0400156 unsigned long limit;
Chris Mason8b712842008-06-11 16:50:36 -0400157
158 bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
Chris Masonb64a2852008-08-20 13:39:41 -0400159 fs_info = device->dev_root->fs_info;
160 limit = btrfs_async_submit_limit(fs_info);
161 limit = limit * 2 / 3;
162
Chris Mason8b712842008-06-11 16:50:36 -0400163loop:
164 spin_lock(&device->io_lock);
165
166 /* take all the bios off the list at once and process them
167 * later on (without the lock held). But, remember the
168 * tail and other pointers so the bios can be properly reinserted
169 * into the list if we hit congestion
170 */
171 pending = device->pending_bios;
172 tail = device->pending_bio_tail;
173 WARN_ON(pending && !tail);
174 device->pending_bios = NULL;
175 device->pending_bio_tail = NULL;
176
177 /*
178 * if pending was null this time around, no bios need processing
179 * at all and we can stop. Otherwise it'll loop back up again
180 * and do an additional check so no bios are missed.
181 *
182 * device->running_pending is used to synchronize with the
183 * schedule_bio code.
184 */
185 if (pending) {
186 again = 1;
187 device->running_pending = 1;
188 } else {
189 again = 0;
190 device->running_pending = 0;
191 }
192 spin_unlock(&device->io_lock);
193
194 while(pending) {
195 cur = pending;
196 pending = pending->bi_next;
197 cur->bi_next = NULL;
Chris Masonb64a2852008-08-20 13:39:41 -0400198 atomic_dec(&fs_info->nr_async_bios);
199
200 if (atomic_read(&fs_info->nr_async_bios) < limit &&
201 waitqueue_active(&fs_info->async_submit_wait))
202 wake_up(&fs_info->async_submit_wait);
Chris Mason492bb6de2008-07-31 16:29:02 -0400203
204 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
205 bio_get(cur);
Chris Mason8b712842008-06-11 16:50:36 -0400206 submit_bio(cur->bi_rw, cur);
Chris Mason492bb6de2008-07-31 16:29:02 -0400207 bio_put(cur);
Chris Mason8b712842008-06-11 16:50:36 -0400208 num_run++;
209
210 /*
211 * we made progress, there is more work to do and the bdi
212 * is now congested. Back off and let other work structs
213 * run instead
214 */
Chris Mason5f2cc082008-11-07 18:22:45 -0500215 if (pending && bdi_write_congested(bdi) &&
216 fs_info->fs_devices->open_devices > 1) {
Chris Mason8b712842008-06-11 16:50:36 -0400217 struct bio *old_head;
218
219 spin_lock(&device->io_lock);
Chris Mason492bb6de2008-07-31 16:29:02 -0400220
Chris Mason8b712842008-06-11 16:50:36 -0400221 old_head = device->pending_bios;
222 device->pending_bios = pending;
223 if (device->pending_bio_tail)
224 tail->bi_next = old_head;
225 else
226 device->pending_bio_tail = tail;
227
228 spin_unlock(&device->io_lock);
229 btrfs_requeue_work(&device->work);
230 goto done;
231 }
232 }
233 if (again)
234 goto loop;
235done:
236 return 0;
237}
238
239void pending_bios_fn(struct btrfs_work *work)
240{
241 struct btrfs_device *device;
242
243 device = container_of(work, struct btrfs_device, work);
244 run_scheduled_bios(device);
245}
246
Chris Masona1b32a52008-09-05 16:09:51 -0400247static noinline int device_list_add(const char *path,
Chris Mason8a4b83c2008-03-24 15:02:07 -0400248 struct btrfs_super_block *disk_super,
249 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
250{
251 struct btrfs_device *device;
252 struct btrfs_fs_devices *fs_devices;
253 u64 found_transid = btrfs_super_generation(disk_super);
254
255 fs_devices = find_fsid(disk_super->fsid);
256 if (!fs_devices) {
Chris Mason515dc322008-05-16 13:30:15 -0400257 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400258 if (!fs_devices)
259 return -ENOMEM;
260 INIT_LIST_HEAD(&fs_devices->devices);
Chris Masonb3075712008-04-22 09:22:07 -0400261 INIT_LIST_HEAD(&fs_devices->alloc_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400262 list_add(&fs_devices->list, &fs_uuids);
263 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
264 fs_devices->latest_devid = devid;
265 fs_devices->latest_trans = found_transid;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400266 device = NULL;
267 } else {
Chris Masona4437552008-04-18 10:29:38 -0400268 device = __find_device(&fs_devices->devices, devid,
269 disk_super->dev_item.uuid);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400270 }
271 if (!device) {
Yan Zheng2b820322008-11-17 21:11:30 -0500272 if (fs_devices->opened)
273 return -EBUSY;
274
Chris Mason8a4b83c2008-03-24 15:02:07 -0400275 device = kzalloc(sizeof(*device), GFP_NOFS);
276 if (!device) {
277 /* we can safely leave the fs_devices entry around */
278 return -ENOMEM;
279 }
280 device->devid = devid;
Chris Mason8b712842008-06-11 16:50:36 -0400281 device->work.func = pending_bios_fn;
Chris Masona4437552008-04-18 10:29:38 -0400282 memcpy(device->uuid, disk_super->dev_item.uuid,
283 BTRFS_UUID_SIZE);
Chris Masonf2984462008-04-10 16:19:33 -0400284 device->barriers = 1;
Chris Masonb248a412008-04-14 09:48:18 -0400285 spin_lock_init(&device->io_lock);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400286 device->name = kstrdup(path, GFP_NOFS);
287 if (!device->name) {
288 kfree(device);
289 return -ENOMEM;
290 }
Yan Zheng2b820322008-11-17 21:11:30 -0500291 INIT_LIST_HEAD(&device->dev_alloc_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400292 list_add(&device->dev_list, &fs_devices->devices);
Yan Zheng2b820322008-11-17 21:11:30 -0500293 device->fs_devices = fs_devices;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400294 fs_devices->num_devices++;
295 }
296
297 if (found_transid > fs_devices->latest_trans) {
298 fs_devices->latest_devid = devid;
299 fs_devices->latest_trans = found_transid;
300 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400301 *fs_devices_ret = fs_devices;
302 return 0;
303}
304
Chris Masondfe25022008-05-13 13:46:40 -0400305int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
306{
Yan Zheng2b820322008-11-17 21:11:30 -0500307 struct list_head *tmp;
Chris Masondfe25022008-05-13 13:46:40 -0400308 struct list_head *cur;
309 struct btrfs_device *device;
Yan Zheng2b820322008-11-17 21:11:30 -0500310 int seed_devices = 0;
Chris Masondfe25022008-05-13 13:46:40 -0400311
312 mutex_lock(&uuid_mutex);
313again:
Yan Zheng2b820322008-11-17 21:11:30 -0500314 list_for_each_safe(cur, tmp, &fs_devices->devices) {
Chris Masondfe25022008-05-13 13:46:40 -0400315 device = list_entry(cur, struct btrfs_device, dev_list);
Yan Zheng2b820322008-11-17 21:11:30 -0500316 if (device->in_fs_metadata)
317 continue;
318
319 if (device->bdev) {
Chris Mason15916de2008-11-19 21:17:22 -0500320 close_bdev_exclusive(device->bdev, device->mode);
Yan Zheng2b820322008-11-17 21:11:30 -0500321 device->bdev = NULL;
322 fs_devices->open_devices--;
323 }
324 if (device->writeable) {
325 list_del_init(&device->dev_alloc_list);
326 device->writeable = 0;
327 fs_devices->rw_devices--;
328 }
329 if (!seed_devices) {
330 list_del_init(&device->dev_list);
Chris Masondfe25022008-05-13 13:46:40 -0400331 fs_devices->num_devices--;
332 kfree(device->name);
333 kfree(device);
Chris Masondfe25022008-05-13 13:46:40 -0400334 }
335 }
Yan Zheng2b820322008-11-17 21:11:30 -0500336
337 if (fs_devices->seed) {
338 fs_devices = fs_devices->seed;
339 seed_devices = 1;
340 goto again;
341 }
342
Chris Masondfe25022008-05-13 13:46:40 -0400343 mutex_unlock(&uuid_mutex);
344 return 0;
345}
Chris Masona0af4692008-05-13 16:03:06 -0400346
Yan Zheng2b820322008-11-17 21:11:30 -0500347static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
Chris Mason8a4b83c2008-03-24 15:02:07 -0400348{
Yan Zheng2b820322008-11-17 21:11:30 -0500349 struct btrfs_fs_devices *seed_devices;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400350 struct list_head *cur;
351 struct btrfs_device *device;
Yan Zheng2b820322008-11-17 21:11:30 -0500352again:
353 if (--fs_devices->opened > 0)
354 return 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400355
Yan Zheng2b820322008-11-17 21:11:30 -0500356 list_for_each(cur, &fs_devices->devices) {
Chris Mason8a4b83c2008-03-24 15:02:07 -0400357 device = list_entry(cur, struct btrfs_device, dev_list);
358 if (device->bdev) {
Chris Mason15916de2008-11-19 21:17:22 -0500359 close_bdev_exclusive(device->bdev, device->mode);
Chris Masona0af4692008-05-13 16:03:06 -0400360 fs_devices->open_devices--;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400361 }
Yan Zheng2b820322008-11-17 21:11:30 -0500362 if (device->writeable) {
363 list_del_init(&device->dev_alloc_list);
364 fs_devices->rw_devices--;
365 }
366
Chris Mason8a4b83c2008-03-24 15:02:07 -0400367 device->bdev = NULL;
Yan Zheng2b820322008-11-17 21:11:30 -0500368 device->writeable = 0;
Chris Masondfe25022008-05-13 13:46:40 -0400369 device->in_fs_metadata = 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400370 }
Yan Zheng2b820322008-11-17 21:11:30 -0500371 fs_devices->opened = 0;
372 fs_devices->seeding = 0;
373 fs_devices->sprouted = 0;
374
375 seed_devices = fs_devices->seed;
376 fs_devices->seed = NULL;
377 if (seed_devices) {
378 fs_devices = seed_devices;
379 goto again;
380 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400381 return 0;
382}
383
Yan Zheng2b820322008-11-17 21:11:30 -0500384int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
385{
386 int ret;
387
388 mutex_lock(&uuid_mutex);
389 ret = __btrfs_close_devices(fs_devices);
390 mutex_unlock(&uuid_mutex);
391 return ret;
392}
393
Chris Mason15916de2008-11-19 21:17:22 -0500394int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
395 int flags, void *holder)
Chris Mason8a4b83c2008-03-24 15:02:07 -0400396{
397 struct block_device *bdev;
398 struct list_head *head = &fs_devices->devices;
399 struct list_head *cur;
400 struct btrfs_device *device;
Chris Masona0af4692008-05-13 16:03:06 -0400401 struct block_device *latest_bdev = NULL;
402 struct buffer_head *bh;
403 struct btrfs_super_block *disk_super;
404 u64 latest_devid = 0;
405 u64 latest_transid = 0;
Chris Masona0af4692008-05-13 16:03:06 -0400406 u64 devid;
Yan Zheng2b820322008-11-17 21:11:30 -0500407 int seeding = 1;
Chris Masona0af4692008-05-13 16:03:06 -0400408 int ret = 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400409
Chris Mason8a4b83c2008-03-24 15:02:07 -0400410 list_for_each(cur, head) {
411 device = list_entry(cur, struct btrfs_device, dev_list);
Chris Masonc1c4d912008-05-08 15:05:58 -0400412 if (device->bdev)
413 continue;
Chris Masondfe25022008-05-13 13:46:40 -0400414 if (!device->name)
415 continue;
416
Chris Mason15916de2008-11-19 21:17:22 -0500417 bdev = open_bdev_exclusive(device->name, flags, holder);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400418 if (IS_ERR(bdev)) {
419 printk("open %s failed\n", device->name);
Chris Masona0af4692008-05-13 16:03:06 -0400420 goto error;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400421 }
Chris Masona061fc82008-05-07 11:43:44 -0400422 set_blocksize(bdev, 4096);
Chris Masona0af4692008-05-13 16:03:06 -0400423
424 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
425 if (!bh)
426 goto error_close;
427
428 disk_super = (struct btrfs_super_block *)bh->b_data;
429 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
430 sizeof(disk_super->magic)))
431 goto error_brelse;
432
433 devid = le64_to_cpu(disk_super->dev_item.devid);
434 if (devid != device->devid)
435 goto error_brelse;
436
Yan Zheng2b820322008-11-17 21:11:30 -0500437 if (memcmp(device->uuid, disk_super->dev_item.uuid,
438 BTRFS_UUID_SIZE))
439 goto error_brelse;
440
441 device->generation = btrfs_super_generation(disk_super);
442 if (!latest_transid || device->generation > latest_transid) {
Chris Masona0af4692008-05-13 16:03:06 -0400443 latest_devid = devid;
Yan Zheng2b820322008-11-17 21:11:30 -0500444 latest_transid = device->generation;
Chris Masona0af4692008-05-13 16:03:06 -0400445 latest_bdev = bdev;
446 }
447
Yan Zheng2b820322008-11-17 21:11:30 -0500448 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
449 device->writeable = 0;
450 } else {
451 device->writeable = !bdev_read_only(bdev);
452 seeding = 0;
453 }
454
Chris Mason8a4b83c2008-03-24 15:02:07 -0400455 device->bdev = bdev;
Chris Masondfe25022008-05-13 13:46:40 -0400456 device->in_fs_metadata = 0;
Chris Mason15916de2008-11-19 21:17:22 -0500457 device->mode = flags;
458
Chris Masona0af4692008-05-13 16:03:06 -0400459 fs_devices->open_devices++;
Yan Zheng2b820322008-11-17 21:11:30 -0500460 if (device->writeable) {
461 fs_devices->rw_devices++;
462 list_add(&device->dev_alloc_list,
463 &fs_devices->alloc_list);
464 }
Chris Masona0af4692008-05-13 16:03:06 -0400465 continue;
Chris Masona061fc82008-05-07 11:43:44 -0400466
Chris Masona0af4692008-05-13 16:03:06 -0400467error_brelse:
468 brelse(bh);
469error_close:
Chris Mason15916de2008-11-19 21:17:22 -0500470 close_bdev_exclusive(bdev, MS_RDONLY);
Chris Masona0af4692008-05-13 16:03:06 -0400471error:
472 continue;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400473 }
Chris Masona0af4692008-05-13 16:03:06 -0400474 if (fs_devices->open_devices == 0) {
475 ret = -EIO;
476 goto out;
477 }
Yan Zheng2b820322008-11-17 21:11:30 -0500478 fs_devices->seeding = seeding;
479 fs_devices->opened = 1;
Chris Masona0af4692008-05-13 16:03:06 -0400480 fs_devices->latest_bdev = latest_bdev;
481 fs_devices->latest_devid = latest_devid;
482 fs_devices->latest_trans = latest_transid;
Yan Zheng2b820322008-11-17 21:11:30 -0500483 fs_devices->total_rw_bytes = 0;
Chris Masona0af4692008-05-13 16:03:06 -0400484out:
Yan Zheng2b820322008-11-17 21:11:30 -0500485 return ret;
486}
487
488int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
489 int flags, void *holder)
490{
491 int ret;
492
493 mutex_lock(&uuid_mutex);
494 if (fs_devices->opened) {
495 if (fs_devices->sprouted) {
496 ret = -EBUSY;
497 } else {
498 fs_devices->opened++;
499 ret = 0;
500 }
501 } else {
Chris Mason15916de2008-11-19 21:17:22 -0500502 ret = __btrfs_open_devices(fs_devices, flags, holder);
Yan Zheng2b820322008-11-17 21:11:30 -0500503 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400504 mutex_unlock(&uuid_mutex);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400505 return ret;
506}
507
508int btrfs_scan_one_device(const char *path, int flags, void *holder,
509 struct btrfs_fs_devices **fs_devices_ret)
510{
511 struct btrfs_super_block *disk_super;
512 struct block_device *bdev;
513 struct buffer_head *bh;
514 int ret;
515 u64 devid;
Chris Masonf2984462008-04-10 16:19:33 -0400516 u64 transid;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400517
518 mutex_lock(&uuid_mutex);
519
Chris Mason15916de2008-11-19 21:17:22 -0500520 bdev = open_bdev_exclusive(path, flags, holder);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400521
522 if (IS_ERR(bdev)) {
Chris Mason8a4b83c2008-03-24 15:02:07 -0400523 ret = PTR_ERR(bdev);
524 goto error;
525 }
526
527 ret = set_blocksize(bdev, 4096);
528 if (ret)
529 goto error_close;
530 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
531 if (!bh) {
532 ret = -EIO;
533 goto error_close;
534 }
535 disk_super = (struct btrfs_super_block *)bh->b_data;
536 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
537 sizeof(disk_super->magic))) {
Yane58ca022008-04-01 11:21:34 -0400538 ret = -EINVAL;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400539 goto error_brelse;
540 }
541 devid = le64_to_cpu(disk_super->dev_item.devid);
Chris Masonf2984462008-04-10 16:19:33 -0400542 transid = btrfs_super_generation(disk_super);
Chris Mason7ae9c092008-04-18 10:29:49 -0400543 if (disk_super->label[0])
544 printk("device label %s ", disk_super->label);
545 else {
546 /* FIXME, make a readl uuid parser */
547 printk("device fsid %llx-%llx ",
548 *(unsigned long long *)disk_super->fsid,
549 *(unsigned long long *)(disk_super->fsid + 8));
550 }
551 printk("devid %Lu transid %Lu %s\n", devid, transid, path);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400552 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
553
554error_brelse:
555 brelse(bh);
556error_close:
Chris Mason15916de2008-11-19 21:17:22 -0500557 close_bdev_exclusive(bdev, flags);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400558error:
559 mutex_unlock(&uuid_mutex);
560 return ret;
561}
Chris Mason0b86a832008-03-24 15:01:56 -0400562
563/*
564 * this uses a pretty simple search, the expectation is that it is
565 * called very infrequently and that a given device has a small number
566 * of extents
567 */
Chris Masona1b32a52008-09-05 16:09:51 -0400568static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
569 struct btrfs_device *device,
Chris Masona1b32a52008-09-05 16:09:51 -0400570 u64 num_bytes, u64 *start)
Chris Mason0b86a832008-03-24 15:01:56 -0400571{
572 struct btrfs_key key;
573 struct btrfs_root *root = device->dev_root;
574 struct btrfs_dev_extent *dev_extent = NULL;
Yan Zheng2b820322008-11-17 21:11:30 -0500575 struct btrfs_path *path;
Chris Mason0b86a832008-03-24 15:01:56 -0400576 u64 hole_size = 0;
577 u64 last_byte = 0;
578 u64 search_start = 0;
579 u64 search_end = device->total_bytes;
580 int ret;
581 int slot = 0;
582 int start_found;
583 struct extent_buffer *l;
584
Yan Zheng2b820322008-11-17 21:11:30 -0500585 path = btrfs_alloc_path();
586 if (!path)
587 return -ENOMEM;
Chris Mason0b86a832008-03-24 15:01:56 -0400588 path->reada = 2;
Yan Zheng2b820322008-11-17 21:11:30 -0500589 start_found = 0;
Chris Mason0b86a832008-03-24 15:01:56 -0400590
591 /* FIXME use last free of some kind */
592
Chris Mason8a4b83c2008-03-24 15:02:07 -0400593 /* we don't want to overwrite the superblock on the drive,
594 * so we make sure to start at an offset of at least 1MB
595 */
596 search_start = max((u64)1024 * 1024, search_start);
Chris Mason8f18cf12008-04-25 16:53:30 -0400597
598 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
599 search_start = max(root->fs_info->alloc_start, search_start);
600
Chris Mason0b86a832008-03-24 15:01:56 -0400601 key.objectid = device->devid;
602 key.offset = search_start;
603 key.type = BTRFS_DEV_EXTENT_KEY;
604 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
605 if (ret < 0)
606 goto error;
607 ret = btrfs_previous_item(root, path, 0, key.type);
608 if (ret < 0)
609 goto error;
610 l = path->nodes[0];
611 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
612 while (1) {
613 l = path->nodes[0];
614 slot = path->slots[0];
615 if (slot >= btrfs_header_nritems(l)) {
616 ret = btrfs_next_leaf(root, path);
617 if (ret == 0)
618 continue;
619 if (ret < 0)
620 goto error;
621no_more_items:
622 if (!start_found) {
623 if (search_start >= search_end) {
624 ret = -ENOSPC;
625 goto error;
626 }
627 *start = search_start;
628 start_found = 1;
629 goto check_pending;
630 }
631 *start = last_byte > search_start ?
632 last_byte : search_start;
633 if (search_end <= *start) {
634 ret = -ENOSPC;
635 goto error;
636 }
637 goto check_pending;
638 }
639 btrfs_item_key_to_cpu(l, &key, slot);
640
641 if (key.objectid < device->devid)
642 goto next;
643
644 if (key.objectid > device->devid)
645 goto no_more_items;
646
647 if (key.offset >= search_start && key.offset > last_byte &&
648 start_found) {
649 if (last_byte < search_start)
650 last_byte = search_start;
651 hole_size = key.offset - last_byte;
652 if (key.offset > last_byte &&
653 hole_size >= num_bytes) {
654 *start = last_byte;
655 goto check_pending;
656 }
657 }
658 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
659 goto next;
660 }
661
662 start_found = 1;
663 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
664 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
665next:
666 path->slots[0]++;
667 cond_resched();
668 }
669check_pending:
670 /* we have to make sure we didn't find an extent that has already
671 * been allocated by the map tree or the original allocation
672 */
Chris Mason0b86a832008-03-24 15:01:56 -0400673 BUG_ON(*start < search_start);
674
Chris Mason6324fbf2008-03-24 15:01:59 -0400675 if (*start + num_bytes > search_end) {
Chris Mason0b86a832008-03-24 15:01:56 -0400676 ret = -ENOSPC;
677 goto error;
678 }
679 /* check for pending inserts here */
Yan Zheng2b820322008-11-17 21:11:30 -0500680 ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -0400681
682error:
Yan Zheng2b820322008-11-17 21:11:30 -0500683 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -0400684 return ret;
685}
686
Chris Mason8f18cf12008-04-25 16:53:30 -0400687int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
688 struct btrfs_device *device,
689 u64 start)
690{
691 int ret;
692 struct btrfs_path *path;
693 struct btrfs_root *root = device->dev_root;
694 struct btrfs_key key;
Chris Masona061fc82008-05-07 11:43:44 -0400695 struct btrfs_key found_key;
696 struct extent_buffer *leaf = NULL;
697 struct btrfs_dev_extent *extent = NULL;
Chris Mason8f18cf12008-04-25 16:53:30 -0400698
699 path = btrfs_alloc_path();
700 if (!path)
701 return -ENOMEM;
702
703 key.objectid = device->devid;
704 key.offset = start;
705 key.type = BTRFS_DEV_EXTENT_KEY;
706
707 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
Chris Masona061fc82008-05-07 11:43:44 -0400708 if (ret > 0) {
709 ret = btrfs_previous_item(root, path, key.objectid,
710 BTRFS_DEV_EXTENT_KEY);
711 BUG_ON(ret);
712 leaf = path->nodes[0];
713 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
714 extent = btrfs_item_ptr(leaf, path->slots[0],
715 struct btrfs_dev_extent);
716 BUG_ON(found_key.offset > start || found_key.offset +
717 btrfs_dev_extent_length(leaf, extent) < start);
718 ret = 0;
719 } else if (ret == 0) {
720 leaf = path->nodes[0];
721 extent = btrfs_item_ptr(leaf, path->slots[0],
722 struct btrfs_dev_extent);
723 }
Chris Mason8f18cf12008-04-25 16:53:30 -0400724 BUG_ON(ret);
725
Chris Masondfe25022008-05-13 13:46:40 -0400726 if (device->bytes_used > 0)
727 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
Chris Mason8f18cf12008-04-25 16:53:30 -0400728 ret = btrfs_del_item(trans, root, path);
729 BUG_ON(ret);
730
731 btrfs_free_path(path);
732 return ret;
733}
734
Yan Zheng2b820322008-11-17 21:11:30 -0500735int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
Chris Mason0b86a832008-03-24 15:01:56 -0400736 struct btrfs_device *device,
Chris Masone17cade2008-04-15 15:41:47 -0400737 u64 chunk_tree, u64 chunk_objectid,
Yan Zheng2b820322008-11-17 21:11:30 -0500738 u64 chunk_offset, u64 start, u64 num_bytes)
Chris Mason0b86a832008-03-24 15:01:56 -0400739{
740 int ret;
741 struct btrfs_path *path;
742 struct btrfs_root *root = device->dev_root;
743 struct btrfs_dev_extent *extent;
744 struct extent_buffer *leaf;
745 struct btrfs_key key;
746
Chris Masondfe25022008-05-13 13:46:40 -0400747 WARN_ON(!device->in_fs_metadata);
Chris Mason0b86a832008-03-24 15:01:56 -0400748 path = btrfs_alloc_path();
749 if (!path)
750 return -ENOMEM;
751
Chris Mason0b86a832008-03-24 15:01:56 -0400752 key.objectid = device->devid;
Yan Zheng2b820322008-11-17 21:11:30 -0500753 key.offset = start;
Chris Mason0b86a832008-03-24 15:01:56 -0400754 key.type = BTRFS_DEV_EXTENT_KEY;
755 ret = btrfs_insert_empty_item(trans, root, path, &key,
756 sizeof(*extent));
757 BUG_ON(ret);
758
759 leaf = path->nodes[0];
760 extent = btrfs_item_ptr(leaf, path->slots[0],
761 struct btrfs_dev_extent);
Chris Masone17cade2008-04-15 15:41:47 -0400762 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
763 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
764 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
765
766 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
767 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
768 BTRFS_UUID_SIZE);
769
Chris Mason0b86a832008-03-24 15:01:56 -0400770 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
771 btrfs_mark_buffer_dirty(leaf);
Chris Mason0b86a832008-03-24 15:01:56 -0400772 btrfs_free_path(path);
773 return ret;
774}
775
Chris Masona1b32a52008-09-05 16:09:51 -0400776static noinline int find_next_chunk(struct btrfs_root *root,
777 u64 objectid, u64 *offset)
Chris Mason0b86a832008-03-24 15:01:56 -0400778{
779 struct btrfs_path *path;
780 int ret;
781 struct btrfs_key key;
Chris Masone17cade2008-04-15 15:41:47 -0400782 struct btrfs_chunk *chunk;
Chris Mason0b86a832008-03-24 15:01:56 -0400783 struct btrfs_key found_key;
784
785 path = btrfs_alloc_path();
786 BUG_ON(!path);
787
Chris Masone17cade2008-04-15 15:41:47 -0400788 key.objectid = objectid;
Chris Mason0b86a832008-03-24 15:01:56 -0400789 key.offset = (u64)-1;
790 key.type = BTRFS_CHUNK_ITEM_KEY;
791
792 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
793 if (ret < 0)
794 goto error;
795
796 BUG_ON(ret == 0);
797
798 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
799 if (ret) {
Chris Masone17cade2008-04-15 15:41:47 -0400800 *offset = 0;
Chris Mason0b86a832008-03-24 15:01:56 -0400801 } else {
802 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
803 path->slots[0]);
Chris Masone17cade2008-04-15 15:41:47 -0400804 if (found_key.objectid != objectid)
805 *offset = 0;
806 else {
807 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
808 struct btrfs_chunk);
809 *offset = found_key.offset +
810 btrfs_chunk_length(path->nodes[0], chunk);
811 }
Chris Mason0b86a832008-03-24 15:01:56 -0400812 }
813 ret = 0;
814error:
815 btrfs_free_path(path);
816 return ret;
817}
818
Yan Zheng2b820322008-11-17 21:11:30 -0500819static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
Chris Mason0b86a832008-03-24 15:01:56 -0400820{
821 int ret;
822 struct btrfs_key key;
823 struct btrfs_key found_key;
Yan Zheng2b820322008-11-17 21:11:30 -0500824 struct btrfs_path *path;
825
826 root = root->fs_info->chunk_root;
827
828 path = btrfs_alloc_path();
829 if (!path)
830 return -ENOMEM;
Chris Mason0b86a832008-03-24 15:01:56 -0400831
832 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
833 key.type = BTRFS_DEV_ITEM_KEY;
834 key.offset = (u64)-1;
835
836 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
837 if (ret < 0)
838 goto error;
839
840 BUG_ON(ret == 0);
841
842 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
843 BTRFS_DEV_ITEM_KEY);
844 if (ret) {
845 *objectid = 1;
846 } else {
847 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
848 path->slots[0]);
849 *objectid = found_key.offset + 1;
850 }
851 ret = 0;
852error:
Yan Zheng2b820322008-11-17 21:11:30 -0500853 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -0400854 return ret;
855}
856
857/*
858 * the device information is stored in the chunk root
859 * the btrfs_device struct should be fully filled in
860 */
861int btrfs_add_device(struct btrfs_trans_handle *trans,
862 struct btrfs_root *root,
863 struct btrfs_device *device)
864{
865 int ret;
866 struct btrfs_path *path;
867 struct btrfs_dev_item *dev_item;
868 struct extent_buffer *leaf;
869 struct btrfs_key key;
870 unsigned long ptr;
Chris Mason0b86a832008-03-24 15:01:56 -0400871
872 root = root->fs_info->chunk_root;
873
874 path = btrfs_alloc_path();
875 if (!path)
876 return -ENOMEM;
877
Chris Mason0b86a832008-03-24 15:01:56 -0400878 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
879 key.type = BTRFS_DEV_ITEM_KEY;
Yan Zheng2b820322008-11-17 21:11:30 -0500880 key.offset = device->devid;
Chris Mason0b86a832008-03-24 15:01:56 -0400881
882 ret = btrfs_insert_empty_item(trans, root, path, &key,
Chris Mason0d81ba52008-03-24 15:02:07 -0400883 sizeof(*dev_item));
Chris Mason0b86a832008-03-24 15:01:56 -0400884 if (ret)
885 goto out;
886
887 leaf = path->nodes[0];
888 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
889
890 btrfs_set_device_id(leaf, dev_item, device->devid);
Yan Zheng2b820322008-11-17 21:11:30 -0500891 btrfs_set_device_generation(leaf, dev_item, 0);
Chris Mason0b86a832008-03-24 15:01:56 -0400892 btrfs_set_device_type(leaf, dev_item, device->type);
893 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
894 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
895 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
Chris Mason0b86a832008-03-24 15:01:56 -0400896 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
897 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
Chris Masone17cade2008-04-15 15:41:47 -0400898 btrfs_set_device_group(leaf, dev_item, 0);
899 btrfs_set_device_seek_speed(leaf, dev_item, 0);
900 btrfs_set_device_bandwidth(leaf, dev_item, 0);
Chris Mason0b86a832008-03-24 15:01:56 -0400901
Chris Mason0b86a832008-03-24 15:01:56 -0400902 ptr = (unsigned long)btrfs_device_uuid(dev_item);
Chris Masone17cade2008-04-15 15:41:47 -0400903 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
Yan Zheng2b820322008-11-17 21:11:30 -0500904 ptr = (unsigned long)btrfs_device_fsid(dev_item);
905 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -0400906 btrfs_mark_buffer_dirty(leaf);
Chris Mason0b86a832008-03-24 15:01:56 -0400907
Yan Zheng2b820322008-11-17 21:11:30 -0500908 ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -0400909out:
910 btrfs_free_path(path);
911 return ret;
912}
Chris Mason8f18cf12008-04-25 16:53:30 -0400913
Chris Masona061fc82008-05-07 11:43:44 -0400914static int btrfs_rm_dev_item(struct btrfs_root *root,
915 struct btrfs_device *device)
916{
917 int ret;
918 struct btrfs_path *path;
Chris Masona061fc82008-05-07 11:43:44 -0400919 struct btrfs_key key;
Chris Masona061fc82008-05-07 11:43:44 -0400920 struct btrfs_trans_handle *trans;
921
922 root = root->fs_info->chunk_root;
923
924 path = btrfs_alloc_path();
925 if (!path)
926 return -ENOMEM;
927
928 trans = btrfs_start_transaction(root, 1);
929 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
930 key.type = BTRFS_DEV_ITEM_KEY;
931 key.offset = device->devid;
Chris Mason7d9eb122008-07-08 14:19:17 -0400932 lock_chunks(root);
Chris Masona061fc82008-05-07 11:43:44 -0400933
934 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
935 if (ret < 0)
936 goto out;
937
938 if (ret > 0) {
939 ret = -ENOENT;
940 goto out;
941 }
942
943 ret = btrfs_del_item(trans, root, path);
944 if (ret)
945 goto out;
Chris Masona061fc82008-05-07 11:43:44 -0400946out:
947 btrfs_free_path(path);
Chris Mason7d9eb122008-07-08 14:19:17 -0400948 unlock_chunks(root);
Chris Masona061fc82008-05-07 11:43:44 -0400949 btrfs_commit_transaction(trans, root);
950 return ret;
951}
952
953int btrfs_rm_device(struct btrfs_root *root, char *device_path)
954{
955 struct btrfs_device *device;
Yan Zheng2b820322008-11-17 21:11:30 -0500956 struct btrfs_device *next_device;
Chris Masona061fc82008-05-07 11:43:44 -0400957 struct block_device *bdev;
Chris Masondfe25022008-05-13 13:46:40 -0400958 struct buffer_head *bh = NULL;
Chris Masona061fc82008-05-07 11:43:44 -0400959 struct btrfs_super_block *disk_super;
960 u64 all_avail;
961 u64 devid;
Yan Zheng2b820322008-11-17 21:11:30 -0500962 u64 num_devices;
963 u8 *dev_uuid;
Chris Masona061fc82008-05-07 11:43:44 -0400964 int ret = 0;
965
Chris Masona061fc82008-05-07 11:43:44 -0400966 mutex_lock(&uuid_mutex);
Chris Mason7d9eb122008-07-08 14:19:17 -0400967 mutex_lock(&root->fs_info->volume_mutex);
Chris Masona061fc82008-05-07 11:43:44 -0400968
969 all_avail = root->fs_info->avail_data_alloc_bits |
970 root->fs_info->avail_system_alloc_bits |
971 root->fs_info->avail_metadata_alloc_bits;
972
973 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
Yan Zheng2b820322008-11-17 21:11:30 -0500974 root->fs_info->fs_devices->rw_devices <= 4) {
Chris Masona061fc82008-05-07 11:43:44 -0400975 printk("btrfs: unable to go below four devices on raid10\n");
976 ret = -EINVAL;
977 goto out;
978 }
979
980 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
Yan Zheng2b820322008-11-17 21:11:30 -0500981 root->fs_info->fs_devices->rw_devices <= 2) {
Chris Masona061fc82008-05-07 11:43:44 -0400982 printk("btrfs: unable to go below two devices on raid1\n");
983 ret = -EINVAL;
984 goto out;
985 }
986
Chris Masondfe25022008-05-13 13:46:40 -0400987 if (strcmp(device_path, "missing") == 0) {
988 struct list_head *cur;
989 struct list_head *devices;
990 struct btrfs_device *tmp;
Chris Masona061fc82008-05-07 11:43:44 -0400991
Chris Masondfe25022008-05-13 13:46:40 -0400992 device = NULL;
993 devices = &root->fs_info->fs_devices->devices;
994 list_for_each(cur, devices) {
995 tmp = list_entry(cur, struct btrfs_device, dev_list);
996 if (tmp->in_fs_metadata && !tmp->bdev) {
997 device = tmp;
998 break;
999 }
1000 }
1001 bdev = NULL;
1002 bh = NULL;
1003 disk_super = NULL;
1004 if (!device) {
1005 printk("btrfs: no missing devices found to remove\n");
1006 goto out;
1007 }
Chris Masondfe25022008-05-13 13:46:40 -04001008 } else {
Chris Mason15916de2008-11-19 21:17:22 -05001009 bdev = open_bdev_exclusive(device_path, MS_RDONLY,
Chris Masondfe25022008-05-13 13:46:40 -04001010 root->fs_info->bdev_holder);
1011 if (IS_ERR(bdev)) {
1012 ret = PTR_ERR(bdev);
1013 goto out;
1014 }
1015
Yan Zheng2b820322008-11-17 21:11:30 -05001016 set_blocksize(bdev, 4096);
Chris Masondfe25022008-05-13 13:46:40 -04001017 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
1018 if (!bh) {
1019 ret = -EIO;
1020 goto error_close;
1021 }
1022 disk_super = (struct btrfs_super_block *)bh->b_data;
1023 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
Yan Zheng2b820322008-11-17 21:11:30 -05001024 sizeof(disk_super->magic))) {
Chris Masondfe25022008-05-13 13:46:40 -04001025 ret = -ENOENT;
1026 goto error_brelse;
1027 }
1028 devid = le64_to_cpu(disk_super->dev_item.devid);
Yan Zheng2b820322008-11-17 21:11:30 -05001029 dev_uuid = disk_super->dev_item.uuid;
1030 device = btrfs_find_device(root, devid, dev_uuid,
1031 disk_super->fsid);
Chris Masondfe25022008-05-13 13:46:40 -04001032 if (!device) {
1033 ret = -ENOENT;
1034 goto error_brelse;
1035 }
Chris Masondfe25022008-05-13 13:46:40 -04001036 }
Yan Zheng2b820322008-11-17 21:11:30 -05001037
1038 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1039 printk("btrfs: unable to remove the only writeable device\n");
1040 ret = -EINVAL;
1041 goto error_brelse;
1042 }
1043
1044 if (device->writeable) {
1045 list_del_init(&device->dev_alloc_list);
1046 root->fs_info->fs_devices->rw_devices--;
1047 }
Chris Masona061fc82008-05-07 11:43:44 -04001048
1049 ret = btrfs_shrink_device(device, 0);
1050 if (ret)
1051 goto error_brelse;
1052
Chris Masona061fc82008-05-07 11:43:44 -04001053 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1054 if (ret)
1055 goto error_brelse;
1056
Yan Zheng2b820322008-11-17 21:11:30 -05001057 device->in_fs_metadata = 0;
1058 if (device->fs_devices == root->fs_info->fs_devices) {
1059 list_del_init(&device->dev_list);
1060 root->fs_info->fs_devices->num_devices--;
1061 if (device->bdev)
1062 device->fs_devices->open_devices--;
1063 }
1064
1065 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1066 struct btrfs_device, dev_list);
1067 if (device->bdev == root->fs_info->sb->s_bdev)
1068 root->fs_info->sb->s_bdev = next_device->bdev;
1069 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1070 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1071
1072 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1073 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1074
1075 if (device->fs_devices != root->fs_info->fs_devices) {
1076 BUG_ON(device->writeable);
1077 brelse(bh);
1078 if (bdev)
Chris Mason15916de2008-11-19 21:17:22 -05001079 close_bdev_exclusive(bdev, MS_RDONLY);
Yan Zheng2b820322008-11-17 21:11:30 -05001080
1081 if (device->bdev) {
Chris Mason15916de2008-11-19 21:17:22 -05001082 close_bdev_exclusive(device->bdev, device->mode);
Yan Zheng2b820322008-11-17 21:11:30 -05001083 device->bdev = NULL;
1084 device->fs_devices->open_devices--;
1085 }
1086 if (device->fs_devices->open_devices == 0) {
1087 struct btrfs_fs_devices *fs_devices;
1088 fs_devices = root->fs_info->fs_devices;
1089 while (fs_devices) {
1090 if (fs_devices->seed == device->fs_devices)
1091 break;
1092 fs_devices = fs_devices->seed;
1093 }
1094 fs_devices->seed = device->fs_devices->seed;
1095 device->fs_devices->seed = NULL;
1096 __btrfs_close_devices(device->fs_devices);
1097 }
1098 ret = 0;
1099 goto out;
1100 }
1101
1102 /*
1103 * at this point, the device is zero sized. We want to
1104 * remove it from the devices list and zero out the old super
1105 */
1106 if (device->writeable) {
Chris Masondfe25022008-05-13 13:46:40 -04001107 /* make sure this device isn't detected as part of
1108 * the FS anymore
1109 */
1110 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
1111 set_buffer_dirty(bh);
1112 sync_dirty_buffer(bh);
Chris Masondfe25022008-05-13 13:46:40 -04001113 }
Yan Zheng2b820322008-11-17 21:11:30 -05001114 brelse(bh);
Chris Masona061fc82008-05-07 11:43:44 -04001115
Chris Masondfe25022008-05-13 13:46:40 -04001116 if (device->bdev) {
1117 /* one close for the device struct or super_block */
Chris Mason15916de2008-11-19 21:17:22 -05001118 close_bdev_exclusive(device->bdev, device->mode);
Chris Masondfe25022008-05-13 13:46:40 -04001119 }
1120 if (bdev) {
1121 /* one close for us */
Chris Mason15916de2008-11-19 21:17:22 -05001122 close_bdev_exclusive(bdev, MS_RDONLY);
Chris Masondfe25022008-05-13 13:46:40 -04001123 }
Chris Masona061fc82008-05-07 11:43:44 -04001124 kfree(device->name);
1125 kfree(device);
1126 ret = 0;
1127 goto out;
1128
1129error_brelse:
1130 brelse(bh);
1131error_close:
Chris Masondfe25022008-05-13 13:46:40 -04001132 if (bdev)
Chris Mason15916de2008-11-19 21:17:22 -05001133 close_bdev_exclusive(bdev, MS_RDONLY);
Chris Masona061fc82008-05-07 11:43:44 -04001134out:
Chris Mason7d9eb122008-07-08 14:19:17 -04001135 mutex_unlock(&root->fs_info->volume_mutex);
Chris Masona061fc82008-05-07 11:43:44 -04001136 mutex_unlock(&uuid_mutex);
Chris Masona061fc82008-05-07 11:43:44 -04001137 return ret;
1138}
1139
Yan Zheng2b820322008-11-17 21:11:30 -05001140/*
1141 * does all the dirty work required for changing file system's UUID.
1142 */
1143static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1144 struct btrfs_root *root)
1145{
1146 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1147 struct btrfs_fs_devices *old_devices;
1148 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
1149 struct btrfs_device *device;
1150 u64 super_flags;
1151
1152 BUG_ON(!mutex_is_locked(&uuid_mutex));
1153 if (!fs_devices->seeding || fs_devices->opened != 1)
1154 return -EINVAL;
1155
1156 old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1157 if (!old_devices)
1158 return -ENOMEM;
1159
1160 memcpy(old_devices, fs_devices, sizeof(*old_devices));
1161 old_devices->opened = 1;
1162 old_devices->sprouted = 1;
1163 INIT_LIST_HEAD(&old_devices->devices);
1164 INIT_LIST_HEAD(&old_devices->alloc_list);
1165 list_splice_init(&fs_devices->devices, &old_devices->devices);
1166 list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list);
1167 list_for_each_entry(device, &old_devices->devices, dev_list) {
1168 device->fs_devices = old_devices;
1169 }
1170 list_add(&old_devices->list, &fs_uuids);
1171
1172 fs_devices->seeding = 0;
1173 fs_devices->num_devices = 0;
1174 fs_devices->open_devices = 0;
1175 fs_devices->seed = old_devices;
1176
1177 generate_random_uuid(fs_devices->fsid);
1178 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1179 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1180 super_flags = btrfs_super_flags(disk_super) &
1181 ~BTRFS_SUPER_FLAG_SEEDING;
1182 btrfs_set_super_flags(disk_super, super_flags);
1183
1184 return 0;
1185}
1186
1187/*
1188 * strore the expected generation for seed devices in device items.
1189 */
1190static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1191 struct btrfs_root *root)
1192{
1193 struct btrfs_path *path;
1194 struct extent_buffer *leaf;
1195 struct btrfs_dev_item *dev_item;
1196 struct btrfs_device *device;
1197 struct btrfs_key key;
1198 u8 fs_uuid[BTRFS_UUID_SIZE];
1199 u8 dev_uuid[BTRFS_UUID_SIZE];
1200 u64 devid;
1201 int ret;
1202
1203 path = btrfs_alloc_path();
1204 if (!path)
1205 return -ENOMEM;
1206
1207 root = root->fs_info->chunk_root;
1208 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1209 key.offset = 0;
1210 key.type = BTRFS_DEV_ITEM_KEY;
1211
1212 while (1) {
1213 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1214 if (ret < 0)
1215 goto error;
1216
1217 leaf = path->nodes[0];
1218next_slot:
1219 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1220 ret = btrfs_next_leaf(root, path);
1221 if (ret > 0)
1222 break;
1223 if (ret < 0)
1224 goto error;
1225 leaf = path->nodes[0];
1226 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1227 btrfs_release_path(root, path);
1228 continue;
1229 }
1230
1231 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1232 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1233 key.type != BTRFS_DEV_ITEM_KEY)
1234 break;
1235
1236 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1237 struct btrfs_dev_item);
1238 devid = btrfs_device_id(leaf, dev_item);
1239 read_extent_buffer(leaf, dev_uuid,
1240 (unsigned long)btrfs_device_uuid(dev_item),
1241 BTRFS_UUID_SIZE);
1242 read_extent_buffer(leaf, fs_uuid,
1243 (unsigned long)btrfs_device_fsid(dev_item),
1244 BTRFS_UUID_SIZE);
1245 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1246 BUG_ON(!device);
1247
1248 if (device->fs_devices->seeding) {
1249 btrfs_set_device_generation(leaf, dev_item,
1250 device->generation);
1251 btrfs_mark_buffer_dirty(leaf);
1252 }
1253
1254 path->slots[0]++;
1255 goto next_slot;
1256 }
1257 ret = 0;
1258error:
1259 btrfs_free_path(path);
1260 return ret;
1261}
1262
Chris Mason788f20e2008-04-28 15:29:42 -04001263int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1264{
1265 struct btrfs_trans_handle *trans;
1266 struct btrfs_device *device;
1267 struct block_device *bdev;
1268 struct list_head *cur;
1269 struct list_head *devices;
Yan Zheng2b820322008-11-17 21:11:30 -05001270 struct super_block *sb = root->fs_info->sb;
Chris Mason788f20e2008-04-28 15:29:42 -04001271 u64 total_bytes;
Yan Zheng2b820322008-11-17 21:11:30 -05001272 int seeding_dev = 0;
Chris Mason788f20e2008-04-28 15:29:42 -04001273 int ret = 0;
1274
Yan Zheng2b820322008-11-17 21:11:30 -05001275 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1276 return -EINVAL;
Chris Mason788f20e2008-04-28 15:29:42 -04001277
Chris Mason15916de2008-11-19 21:17:22 -05001278 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
Chris Mason788f20e2008-04-28 15:29:42 -04001279 if (!bdev) {
1280 return -EIO;
1281 }
Chris Masona2135012008-06-25 16:01:30 -04001282
Yan Zheng2b820322008-11-17 21:11:30 -05001283 if (root->fs_info->fs_devices->seeding) {
1284 seeding_dev = 1;
1285 down_write(&sb->s_umount);
1286 mutex_lock(&uuid_mutex);
1287 }
1288
Chris Mason8c8bee1d2008-09-29 11:19:10 -04001289 filemap_write_and_wait(bdev->bd_inode->i_mapping);
Chris Mason7d9eb122008-07-08 14:19:17 -04001290 mutex_lock(&root->fs_info->volume_mutex);
Chris Masona2135012008-06-25 16:01:30 -04001291
Chris Mason788f20e2008-04-28 15:29:42 -04001292 devices = &root->fs_info->fs_devices->devices;
1293 list_for_each(cur, devices) {
1294 device = list_entry(cur, struct btrfs_device, dev_list);
1295 if (device->bdev == bdev) {
1296 ret = -EEXIST;
Yan Zheng2b820322008-11-17 21:11:30 -05001297 goto error;
Chris Mason788f20e2008-04-28 15:29:42 -04001298 }
1299 }
1300
1301 device = kzalloc(sizeof(*device), GFP_NOFS);
1302 if (!device) {
1303 /* we can safely leave the fs_devices entry around */
1304 ret = -ENOMEM;
Yan Zheng2b820322008-11-17 21:11:30 -05001305 goto error;
Chris Mason788f20e2008-04-28 15:29:42 -04001306 }
1307
Chris Mason788f20e2008-04-28 15:29:42 -04001308 device->name = kstrdup(device_path, GFP_NOFS);
1309 if (!device->name) {
1310 kfree(device);
Yan Zheng2b820322008-11-17 21:11:30 -05001311 ret = -ENOMEM;
1312 goto error;
Chris Mason788f20e2008-04-28 15:29:42 -04001313 }
Yan Zheng2b820322008-11-17 21:11:30 -05001314
1315 ret = find_next_devid(root, &device->devid);
1316 if (ret) {
1317 kfree(device);
1318 goto error;
1319 }
1320
1321 trans = btrfs_start_transaction(root, 1);
1322 lock_chunks(root);
1323
1324 device->barriers = 1;
1325 device->writeable = 1;
1326 device->work.func = pending_bios_fn;
1327 generate_random_uuid(device->uuid);
1328 spin_lock_init(&device->io_lock);
1329 device->generation = trans->transid;
Chris Mason788f20e2008-04-28 15:29:42 -04001330 device->io_width = root->sectorsize;
1331 device->io_align = root->sectorsize;
1332 device->sector_size = root->sectorsize;
1333 device->total_bytes = i_size_read(bdev->bd_inode);
1334 device->dev_root = root->fs_info->dev_root;
1335 device->bdev = bdev;
Chris Masondfe25022008-05-13 13:46:40 -04001336 device->in_fs_metadata = 1;
Chris Mason15916de2008-11-19 21:17:22 -05001337 device->mode = 0;
Zheng Yan325cd4b2008-09-05 16:43:54 -04001338 set_blocksize(device->bdev, 4096);
1339
Yan Zheng2b820322008-11-17 21:11:30 -05001340 if (seeding_dev) {
1341 sb->s_flags &= ~MS_RDONLY;
1342 ret = btrfs_prepare_sprout(trans, root);
1343 BUG_ON(ret);
1344 }
1345
1346 device->fs_devices = root->fs_info->fs_devices;
1347 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
1348 list_add(&device->dev_alloc_list,
1349 &root->fs_info->fs_devices->alloc_list);
1350 root->fs_info->fs_devices->num_devices++;
1351 root->fs_info->fs_devices->open_devices++;
1352 root->fs_info->fs_devices->rw_devices++;
1353 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1354
Chris Mason788f20e2008-04-28 15:29:42 -04001355 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
1356 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
1357 total_bytes + device->total_bytes);
1358
1359 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
1360 btrfs_set_super_num_devices(&root->fs_info->super_copy,
1361 total_bytes + 1);
1362
Yan Zheng2b820322008-11-17 21:11:30 -05001363 if (seeding_dev) {
1364 ret = init_first_rw_device(trans, root, device);
1365 BUG_ON(ret);
1366 ret = btrfs_finish_sprout(trans, root);
1367 BUG_ON(ret);
1368 } else {
1369 ret = btrfs_add_device(trans, root, device);
1370 }
1371
Chris Mason7d9eb122008-07-08 14:19:17 -04001372 unlock_chunks(root);
Yan Zheng2b820322008-11-17 21:11:30 -05001373 btrfs_commit_transaction(trans, root);
1374
1375 if (seeding_dev) {
1376 mutex_unlock(&uuid_mutex);
1377 up_write(&sb->s_umount);
1378
1379 ret = btrfs_relocate_sys_chunks(root);
1380 BUG_ON(ret);
1381 }
1382out:
Chris Mason7d9eb122008-07-08 14:19:17 -04001383 mutex_unlock(&root->fs_info->volume_mutex);
Chris Mason788f20e2008-04-28 15:29:42 -04001384 return ret;
Yan Zheng2b820322008-11-17 21:11:30 -05001385error:
Chris Mason15916de2008-11-19 21:17:22 -05001386 close_bdev_exclusive(bdev, 0);
Yan Zheng2b820322008-11-17 21:11:30 -05001387 if (seeding_dev) {
1388 mutex_unlock(&uuid_mutex);
1389 up_write(&sb->s_umount);
1390 }
Chris Mason788f20e2008-04-28 15:29:42 -04001391 goto out;
1392}
1393
Chris Masona1b32a52008-09-05 16:09:51 -04001394int noinline btrfs_update_device(struct btrfs_trans_handle *trans,
1395 struct btrfs_device *device)
Chris Mason0b86a832008-03-24 15:01:56 -04001396{
1397 int ret;
1398 struct btrfs_path *path;
1399 struct btrfs_root *root;
1400 struct btrfs_dev_item *dev_item;
1401 struct extent_buffer *leaf;
1402 struct btrfs_key key;
1403
1404 root = device->dev_root->fs_info->chunk_root;
1405
1406 path = btrfs_alloc_path();
1407 if (!path)
1408 return -ENOMEM;
1409
1410 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1411 key.type = BTRFS_DEV_ITEM_KEY;
1412 key.offset = device->devid;
1413
1414 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1415 if (ret < 0)
1416 goto out;
1417
1418 if (ret > 0) {
1419 ret = -ENOENT;
1420 goto out;
1421 }
1422
1423 leaf = path->nodes[0];
1424 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1425
1426 btrfs_set_device_id(leaf, dev_item, device->devid);
1427 btrfs_set_device_type(leaf, dev_item, device->type);
1428 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1429 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1430 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
Chris Mason0b86a832008-03-24 15:01:56 -04001431 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1432 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1433 btrfs_mark_buffer_dirty(leaf);
1434
1435out:
1436 btrfs_free_path(path);
1437 return ret;
1438}
1439
Chris Mason7d9eb122008-07-08 14:19:17 -04001440static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
Chris Mason8f18cf12008-04-25 16:53:30 -04001441 struct btrfs_device *device, u64 new_size)
1442{
1443 struct btrfs_super_block *super_copy =
1444 &device->dev_root->fs_info->super_copy;
1445 u64 old_total = btrfs_super_total_bytes(super_copy);
1446 u64 diff = new_size - device->total_bytes;
1447
Yan Zheng2b820322008-11-17 21:11:30 -05001448 if (!device->writeable)
1449 return -EACCES;
1450 if (new_size <= device->total_bytes)
1451 return -EINVAL;
1452
Chris Mason8f18cf12008-04-25 16:53:30 -04001453 btrfs_set_super_total_bytes(super_copy, old_total + diff);
Yan Zheng2b820322008-11-17 21:11:30 -05001454 device->fs_devices->total_rw_bytes += diff;
1455
1456 device->total_bytes = new_size;
Chris Mason8f18cf12008-04-25 16:53:30 -04001457 return btrfs_update_device(trans, device);
1458}
1459
Chris Mason7d9eb122008-07-08 14:19:17 -04001460int btrfs_grow_device(struct btrfs_trans_handle *trans,
1461 struct btrfs_device *device, u64 new_size)
1462{
1463 int ret;
1464 lock_chunks(device->dev_root);
1465 ret = __btrfs_grow_device(trans, device, new_size);
1466 unlock_chunks(device->dev_root);
1467 return ret;
1468}
1469
Chris Mason8f18cf12008-04-25 16:53:30 -04001470static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1471 struct btrfs_root *root,
1472 u64 chunk_tree, u64 chunk_objectid,
1473 u64 chunk_offset)
1474{
1475 int ret;
1476 struct btrfs_path *path;
1477 struct btrfs_key key;
1478
1479 root = root->fs_info->chunk_root;
1480 path = btrfs_alloc_path();
1481 if (!path)
1482 return -ENOMEM;
1483
1484 key.objectid = chunk_objectid;
1485 key.offset = chunk_offset;
1486 key.type = BTRFS_CHUNK_ITEM_KEY;
1487
1488 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1489 BUG_ON(ret);
1490
1491 ret = btrfs_del_item(trans, root, path);
1492 BUG_ON(ret);
1493
1494 btrfs_free_path(path);
1495 return 0;
1496}
1497
1498int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1499 chunk_offset)
1500{
1501 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1502 struct btrfs_disk_key *disk_key;
1503 struct btrfs_chunk *chunk;
1504 u8 *ptr;
1505 int ret = 0;
1506 u32 num_stripes;
1507 u32 array_size;
1508 u32 len = 0;
1509 u32 cur;
1510 struct btrfs_key key;
1511
1512 array_size = btrfs_super_sys_array_size(super_copy);
1513
1514 ptr = super_copy->sys_chunk_array;
1515 cur = 0;
1516
1517 while (cur < array_size) {
1518 disk_key = (struct btrfs_disk_key *)ptr;
1519 btrfs_disk_key_to_cpu(&key, disk_key);
1520
1521 len = sizeof(*disk_key);
1522
1523 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1524 chunk = (struct btrfs_chunk *)(ptr + len);
1525 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1526 len += btrfs_chunk_item_size(num_stripes);
1527 } else {
1528 ret = -EIO;
1529 break;
1530 }
1531 if (key.objectid == chunk_objectid &&
1532 key.offset == chunk_offset) {
1533 memmove(ptr, ptr + len, array_size - (cur + len));
1534 array_size -= len;
1535 btrfs_set_super_sys_array_size(super_copy, array_size);
1536 } else {
1537 ptr += len;
1538 cur += len;
1539 }
1540 }
1541 return ret;
1542}
1543
Chris Mason8f18cf12008-04-25 16:53:30 -04001544int btrfs_relocate_chunk(struct btrfs_root *root,
1545 u64 chunk_tree, u64 chunk_objectid,
1546 u64 chunk_offset)
1547{
1548 struct extent_map_tree *em_tree;
1549 struct btrfs_root *extent_root;
1550 struct btrfs_trans_handle *trans;
1551 struct extent_map *em;
1552 struct map_lookup *map;
1553 int ret;
1554 int i;
1555
Chris Mason323da792008-05-09 11:46:48 -04001556 printk("btrfs relocating chunk %llu\n",
1557 (unsigned long long)chunk_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001558 root = root->fs_info->chunk_root;
1559 extent_root = root->fs_info->extent_root;
1560 em_tree = &root->fs_info->mapping_tree.map_tree;
1561
1562 /* step one, relocate all the extents inside this chunk */
Zheng Yan1a40e232008-09-26 10:09:34 -04001563 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001564 BUG_ON(ret);
1565
1566 trans = btrfs_start_transaction(root, 1);
1567 BUG_ON(!trans);
1568
Chris Mason7d9eb122008-07-08 14:19:17 -04001569 lock_chunks(root);
1570
Chris Mason8f18cf12008-04-25 16:53:30 -04001571 /*
1572 * step two, delete the device extents and the
1573 * chunk tree entries
1574 */
1575 spin_lock(&em_tree->lock);
1576 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1577 spin_unlock(&em_tree->lock);
1578
Chris Masona061fc82008-05-07 11:43:44 -04001579 BUG_ON(em->start > chunk_offset ||
1580 em->start + em->len < chunk_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001581 map = (struct map_lookup *)em->bdev;
1582
1583 for (i = 0; i < map->num_stripes; i++) {
1584 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1585 map->stripes[i].physical);
1586 BUG_ON(ret);
Chris Masona061fc82008-05-07 11:43:44 -04001587
Chris Masondfe25022008-05-13 13:46:40 -04001588 if (map->stripes[i].dev) {
1589 ret = btrfs_update_device(trans, map->stripes[i].dev);
1590 BUG_ON(ret);
1591 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001592 }
1593 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1594 chunk_offset);
1595
1596 BUG_ON(ret);
1597
1598 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1599 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1600 BUG_ON(ret);
Chris Mason8f18cf12008-04-25 16:53:30 -04001601 }
1602
Zheng Yan1a40e232008-09-26 10:09:34 -04001603 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1604 BUG_ON(ret);
1605
Chris Mason8f18cf12008-04-25 16:53:30 -04001606 spin_lock(&em_tree->lock);
1607 remove_extent_mapping(em_tree, em);
Zheng Yan1a40e232008-09-26 10:09:34 -04001608 spin_unlock(&em_tree->lock);
1609
Chris Mason8f18cf12008-04-25 16:53:30 -04001610 kfree(map);
1611 em->bdev = NULL;
1612
1613 /* once for the tree */
1614 free_extent_map(em);
Chris Mason8f18cf12008-04-25 16:53:30 -04001615 /* once for us */
1616 free_extent_map(em);
1617
Chris Mason7d9eb122008-07-08 14:19:17 -04001618 unlock_chunks(root);
Chris Mason8f18cf12008-04-25 16:53:30 -04001619 btrfs_end_transaction(trans, root);
1620 return 0;
1621}
1622
Yan Zheng2b820322008-11-17 21:11:30 -05001623static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1624{
1625 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
1626 struct btrfs_path *path;
1627 struct extent_buffer *leaf;
1628 struct btrfs_chunk *chunk;
1629 struct btrfs_key key;
1630 struct btrfs_key found_key;
1631 u64 chunk_tree = chunk_root->root_key.objectid;
1632 u64 chunk_type;
1633 int ret;
1634
1635 path = btrfs_alloc_path();
1636 if (!path)
1637 return -ENOMEM;
1638
1639 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1640 key.offset = (u64)-1;
1641 key.type = BTRFS_CHUNK_ITEM_KEY;
1642
1643 while (1) {
1644 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1645 if (ret < 0)
1646 goto error;
1647 BUG_ON(ret == 0);
1648
1649 ret = btrfs_previous_item(chunk_root, path, key.objectid,
1650 key.type);
1651 if (ret < 0)
1652 goto error;
1653 if (ret > 0)
1654 break;
1655
1656 leaf = path->nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1658
1659 chunk = btrfs_item_ptr(leaf, path->slots[0],
1660 struct btrfs_chunk);
1661 chunk_type = btrfs_chunk_type(leaf, chunk);
1662 btrfs_release_path(chunk_root, path);
1663
1664 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1665 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1666 found_key.objectid,
1667 found_key.offset);
1668 BUG_ON(ret);
1669 }
1670
1671 if (found_key.offset == 0)
1672 break;
1673 key.offset = found_key.offset - 1;
1674 }
1675 ret = 0;
1676error:
1677 btrfs_free_path(path);
1678 return ret;
1679}
1680
Chris Masonec44a352008-04-28 15:29:52 -04001681static u64 div_factor(u64 num, int factor)
1682{
1683 if (factor == 10)
1684 return num;
1685 num *= factor;
1686 do_div(num, 10);
1687 return num;
1688}
1689
Chris Masonec44a352008-04-28 15:29:52 -04001690int btrfs_balance(struct btrfs_root *dev_root)
1691{
1692 int ret;
1693 struct list_head *cur;
1694 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1695 struct btrfs_device *device;
1696 u64 old_size;
1697 u64 size_to_free;
1698 struct btrfs_path *path;
1699 struct btrfs_key key;
1700 struct btrfs_chunk *chunk;
1701 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1702 struct btrfs_trans_handle *trans;
1703 struct btrfs_key found_key;
1704
Yan Zheng2b820322008-11-17 21:11:30 -05001705 if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
1706 return -EROFS;
Chris Masonec44a352008-04-28 15:29:52 -04001707
Chris Mason7d9eb122008-07-08 14:19:17 -04001708 mutex_lock(&dev_root->fs_info->volume_mutex);
Chris Masonec44a352008-04-28 15:29:52 -04001709 dev_root = dev_root->fs_info->dev_root;
1710
Chris Masonec44a352008-04-28 15:29:52 -04001711 /* step one make some room on all the devices */
1712 list_for_each(cur, devices) {
1713 device = list_entry(cur, struct btrfs_device, dev_list);
1714 old_size = device->total_bytes;
1715 size_to_free = div_factor(old_size, 1);
1716 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
Yan Zheng2b820322008-11-17 21:11:30 -05001717 if (!device->writeable ||
1718 device->total_bytes - device->bytes_used > size_to_free)
Chris Masonec44a352008-04-28 15:29:52 -04001719 continue;
1720
1721 ret = btrfs_shrink_device(device, old_size - size_to_free);
1722 BUG_ON(ret);
1723
1724 trans = btrfs_start_transaction(dev_root, 1);
1725 BUG_ON(!trans);
1726
1727 ret = btrfs_grow_device(trans, device, old_size);
1728 BUG_ON(ret);
1729
1730 btrfs_end_transaction(trans, dev_root);
1731 }
1732
1733 /* step two, relocate all the chunks */
1734 path = btrfs_alloc_path();
1735 BUG_ON(!path);
1736
1737 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1738 key.offset = (u64)-1;
1739 key.type = BTRFS_CHUNK_ITEM_KEY;
1740
1741 while(1) {
1742 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1743 if (ret < 0)
1744 goto error;
1745
1746 /*
1747 * this shouldn't happen, it means the last relocate
1748 * failed
1749 */
1750 if (ret == 0)
1751 break;
1752
1753 ret = btrfs_previous_item(chunk_root, path, 0,
1754 BTRFS_CHUNK_ITEM_KEY);
Chris Mason7d9eb122008-07-08 14:19:17 -04001755 if (ret)
Chris Masonec44a352008-04-28 15:29:52 -04001756 break;
Chris Mason7d9eb122008-07-08 14:19:17 -04001757
Chris Masonec44a352008-04-28 15:29:52 -04001758 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1759 path->slots[0]);
1760 if (found_key.objectid != key.objectid)
1761 break;
Chris Mason7d9eb122008-07-08 14:19:17 -04001762
Chris Masonec44a352008-04-28 15:29:52 -04001763 chunk = btrfs_item_ptr(path->nodes[0],
1764 path->slots[0],
1765 struct btrfs_chunk);
1766 key.offset = found_key.offset;
1767 /* chunk zero is special */
1768 if (key.offset == 0)
1769 break;
1770
Chris Mason7d9eb122008-07-08 14:19:17 -04001771 btrfs_release_path(chunk_root, path);
Chris Masonec44a352008-04-28 15:29:52 -04001772 ret = btrfs_relocate_chunk(chunk_root,
1773 chunk_root->root_key.objectid,
1774 found_key.objectid,
1775 found_key.offset);
1776 BUG_ON(ret);
Chris Masonec44a352008-04-28 15:29:52 -04001777 }
1778 ret = 0;
1779error:
1780 btrfs_free_path(path);
Chris Mason7d9eb122008-07-08 14:19:17 -04001781 mutex_unlock(&dev_root->fs_info->volume_mutex);
Chris Masonec44a352008-04-28 15:29:52 -04001782 return ret;
1783}
1784
Chris Mason8f18cf12008-04-25 16:53:30 -04001785/*
1786 * shrinking a device means finding all of the device extents past
1787 * the new size, and then following the back refs to the chunks.
1788 * The chunk relocation code actually frees the device extent
1789 */
1790int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1791{
1792 struct btrfs_trans_handle *trans;
1793 struct btrfs_root *root = device->dev_root;
1794 struct btrfs_dev_extent *dev_extent = NULL;
1795 struct btrfs_path *path;
1796 u64 length;
1797 u64 chunk_tree;
1798 u64 chunk_objectid;
1799 u64 chunk_offset;
1800 int ret;
1801 int slot;
1802 struct extent_buffer *l;
1803 struct btrfs_key key;
1804 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1805 u64 old_total = btrfs_super_total_bytes(super_copy);
1806 u64 diff = device->total_bytes - new_size;
1807
Yan Zheng2b820322008-11-17 21:11:30 -05001808 if (new_size >= device->total_bytes)
1809 return -EINVAL;
Chris Mason8f18cf12008-04-25 16:53:30 -04001810
1811 path = btrfs_alloc_path();
1812 if (!path)
1813 return -ENOMEM;
1814
1815 trans = btrfs_start_transaction(root, 1);
1816 if (!trans) {
1817 ret = -ENOMEM;
1818 goto done;
1819 }
1820
1821 path->reada = 2;
1822
Chris Mason7d9eb122008-07-08 14:19:17 -04001823 lock_chunks(root);
1824
Chris Mason8f18cf12008-04-25 16:53:30 -04001825 device->total_bytes = new_size;
Yan Zheng2b820322008-11-17 21:11:30 -05001826 if (device->writeable)
1827 device->fs_devices->total_rw_bytes -= diff;
Chris Mason8f18cf12008-04-25 16:53:30 -04001828 ret = btrfs_update_device(trans, device);
1829 if (ret) {
Chris Mason7d9eb122008-07-08 14:19:17 -04001830 unlock_chunks(root);
Chris Mason8f18cf12008-04-25 16:53:30 -04001831 btrfs_end_transaction(trans, root);
1832 goto done;
1833 }
1834 WARN_ON(diff > old_total);
1835 btrfs_set_super_total_bytes(super_copy, old_total - diff);
Chris Mason7d9eb122008-07-08 14:19:17 -04001836 unlock_chunks(root);
Chris Mason8f18cf12008-04-25 16:53:30 -04001837 btrfs_end_transaction(trans, root);
1838
1839 key.objectid = device->devid;
1840 key.offset = (u64)-1;
1841 key.type = BTRFS_DEV_EXTENT_KEY;
1842
1843 while (1) {
1844 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1845 if (ret < 0)
1846 goto done;
1847
1848 ret = btrfs_previous_item(root, path, 0, key.type);
1849 if (ret < 0)
1850 goto done;
1851 if (ret) {
1852 ret = 0;
1853 goto done;
1854 }
1855
1856 l = path->nodes[0];
1857 slot = path->slots[0];
1858 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1859
1860 if (key.objectid != device->devid)
1861 goto done;
1862
1863 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1864 length = btrfs_dev_extent_length(l, dev_extent);
1865
1866 if (key.offset + length <= new_size)
1867 goto done;
1868
1869 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1870 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1871 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1872 btrfs_release_path(root, path);
1873
1874 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1875 chunk_offset);
1876 if (ret)
1877 goto done;
1878 }
1879
1880done:
1881 btrfs_free_path(path);
1882 return ret;
1883}
1884
Chris Mason0b86a832008-03-24 15:01:56 -04001885int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1886 struct btrfs_root *root,
1887 struct btrfs_key *key,
1888 struct btrfs_chunk *chunk, int item_size)
1889{
1890 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1891 struct btrfs_disk_key disk_key;
1892 u32 array_size;
1893 u8 *ptr;
1894
1895 array_size = btrfs_super_sys_array_size(super_copy);
1896 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1897 return -EFBIG;
1898
1899 ptr = super_copy->sys_chunk_array + array_size;
1900 btrfs_cpu_key_to_disk(&disk_key, key);
1901 memcpy(ptr, &disk_key, sizeof(disk_key));
1902 ptr += sizeof(disk_key);
1903 memcpy(ptr, chunk, item_size);
1904 item_size += sizeof(disk_key);
1905 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1906 return 0;
1907}
1908
Chris Masona1b32a52008-09-05 16:09:51 -04001909static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size,
1910 int num_stripes, int sub_stripes)
Chris Mason9b3f68b2008-04-18 10:29:51 -04001911{
1912 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1913 return calc_size;
1914 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1915 return calc_size * (num_stripes / sub_stripes);
1916 else
1917 return calc_size * num_stripes;
1918}
1919
Yan Zheng2b820322008-11-17 21:11:30 -05001920static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1921 struct btrfs_root *extent_root,
1922 struct map_lookup **map_ret,
1923 u64 *num_bytes, u64 *stripe_size,
1924 u64 start, u64 type)
Chris Mason0b86a832008-03-24 15:01:56 -04001925{
Chris Mason593060d2008-03-25 16:50:33 -04001926 struct btrfs_fs_info *info = extent_root->fs_info;
Chris Mason0b86a832008-03-24 15:01:56 -04001927 struct btrfs_device *device = NULL;
Yan Zheng2b820322008-11-17 21:11:30 -05001928 struct btrfs_fs_devices *fs_devices = info->fs_devices;
Chris Mason6324fbf2008-03-24 15:01:59 -04001929 struct list_head *cur;
Yan Zheng2b820322008-11-17 21:11:30 -05001930 struct map_lookup *map = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04001931 struct extent_map_tree *em_tree;
Chris Mason0b86a832008-03-24 15:01:56 -04001932 struct extent_map *em;
Yan Zheng2b820322008-11-17 21:11:30 -05001933 struct list_head private_devs;
Chris Masona40a90a2008-04-18 11:55:51 -04001934 int min_stripe_size = 1 * 1024 * 1024;
Chris Mason0b86a832008-03-24 15:01:56 -04001935 u64 calc_size = 1024 * 1024 * 1024;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001936 u64 max_chunk_size = calc_size;
1937 u64 min_free;
Chris Mason6324fbf2008-03-24 15:01:59 -04001938 u64 avail;
1939 u64 max_avail = 0;
Yan Zheng2b820322008-11-17 21:11:30 -05001940 u64 dev_offset;
Chris Mason6324fbf2008-03-24 15:01:59 -04001941 int num_stripes = 1;
Chris Masona40a90a2008-04-18 11:55:51 -04001942 int min_stripes = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04001943 int sub_stripes = 0;
Chris Mason6324fbf2008-03-24 15:01:59 -04001944 int looped = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04001945 int ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04001946 int index;
Chris Mason593060d2008-03-25 16:50:33 -04001947 int stripe_len = 64 * 1024;
Chris Mason0b86a832008-03-24 15:01:56 -04001948
Chris Masonec44a352008-04-28 15:29:52 -04001949 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1950 (type & BTRFS_BLOCK_GROUP_DUP)) {
1951 WARN_ON(1);
1952 type &= ~BTRFS_BLOCK_GROUP_DUP;
1953 }
Yan Zheng2b820322008-11-17 21:11:30 -05001954 if (list_empty(&fs_devices->alloc_list))
Chris Mason6324fbf2008-03-24 15:01:59 -04001955 return -ENOSPC;
Chris Mason593060d2008-03-25 16:50:33 -04001956
Chris Masona40a90a2008-04-18 11:55:51 -04001957 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
Yan Zheng2b820322008-11-17 21:11:30 -05001958 num_stripes = fs_devices->rw_devices;
Chris Masona40a90a2008-04-18 11:55:51 -04001959 min_stripes = 2;
1960 }
1961 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
Chris Mason611f0e02008-04-03 16:29:03 -04001962 num_stripes = 2;
Chris Masona40a90a2008-04-18 11:55:51 -04001963 min_stripes = 2;
1964 }
Chris Mason8790d502008-04-03 16:29:03 -04001965 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
Yan Zheng2b820322008-11-17 21:11:30 -05001966 num_stripes = min_t(u64, 2, fs_devices->rw_devices);
Chris Mason9b3f68b2008-04-18 10:29:51 -04001967 if (num_stripes < 2)
1968 return -ENOSPC;
Chris Masona40a90a2008-04-18 11:55:51 -04001969 min_stripes = 2;
Chris Mason8790d502008-04-03 16:29:03 -04001970 }
Chris Mason321aecc2008-04-16 10:49:51 -04001971 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
Yan Zheng2b820322008-11-17 21:11:30 -05001972 num_stripes = fs_devices->rw_devices;
Chris Mason321aecc2008-04-16 10:49:51 -04001973 if (num_stripes < 4)
1974 return -ENOSPC;
1975 num_stripes &= ~(u32)1;
1976 sub_stripes = 2;
Chris Masona40a90a2008-04-18 11:55:51 -04001977 min_stripes = 4;
Chris Mason321aecc2008-04-16 10:49:51 -04001978 }
Chris Mason9b3f68b2008-04-18 10:29:51 -04001979
1980 if (type & BTRFS_BLOCK_GROUP_DATA) {
1981 max_chunk_size = 10 * calc_size;
Chris Masona40a90a2008-04-18 11:55:51 -04001982 min_stripe_size = 64 * 1024 * 1024;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001983 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1984 max_chunk_size = 4 * calc_size;
Chris Masona40a90a2008-04-18 11:55:51 -04001985 min_stripe_size = 32 * 1024 * 1024;
1986 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1987 calc_size = 8 * 1024 * 1024;
1988 max_chunk_size = calc_size * 2;
1989 min_stripe_size = 1 * 1024 * 1024;
Chris Mason9b3f68b2008-04-18 10:29:51 -04001990 }
1991
Yan Zheng2b820322008-11-17 21:11:30 -05001992 /* we don't want a chunk larger than 10% of writeable space */
1993 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
1994 max_chunk_size);
Chris Mason9b3f68b2008-04-18 10:29:51 -04001995
Chris Masona40a90a2008-04-18 11:55:51 -04001996again:
Yan Zheng2b820322008-11-17 21:11:30 -05001997 if (!map || map->num_stripes != num_stripes) {
1998 kfree(map);
1999 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2000 if (!map)
2001 return -ENOMEM;
2002 map->num_stripes = num_stripes;
2003 }
2004
Chris Mason9b3f68b2008-04-18 10:29:51 -04002005 if (calc_size * num_stripes > max_chunk_size) {
2006 calc_size = max_chunk_size;
2007 do_div(calc_size, num_stripes);
2008 do_div(calc_size, stripe_len);
2009 calc_size *= stripe_len;
2010 }
2011 /* we don't want tiny stripes */
Chris Masona40a90a2008-04-18 11:55:51 -04002012 calc_size = max_t(u64, min_stripe_size, calc_size);
Chris Mason9b3f68b2008-04-18 10:29:51 -04002013
Chris Mason9b3f68b2008-04-18 10:29:51 -04002014 do_div(calc_size, stripe_len);
2015 calc_size *= stripe_len;
2016
Yan Zheng2b820322008-11-17 21:11:30 -05002017 cur = fs_devices->alloc_list.next;
Chris Mason6324fbf2008-03-24 15:01:59 -04002018 index = 0;
Chris Mason611f0e02008-04-03 16:29:03 -04002019
2020 if (type & BTRFS_BLOCK_GROUP_DUP)
2021 min_free = calc_size * 2;
Chris Mason9b3f68b2008-04-18 10:29:51 -04002022 else
2023 min_free = calc_size;
Chris Mason611f0e02008-04-03 16:29:03 -04002024
Josef Bacik0f9dd462008-09-23 13:14:11 -04002025 /*
2026 * we add 1MB because we never use the first 1MB of the device, unless
2027 * we've looped, then we are likely allocating the maximum amount of
2028 * space left already
2029 */
2030 if (!looped)
2031 min_free += 1024 * 1024;
Chris Masonad5bd912008-04-21 08:28:10 -04002032
Yan Zheng2b820322008-11-17 21:11:30 -05002033 INIT_LIST_HEAD(&private_devs);
Chris Mason6324fbf2008-03-24 15:01:59 -04002034 while(index < num_stripes) {
Chris Masonb3075712008-04-22 09:22:07 -04002035 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
Yan Zheng2b820322008-11-17 21:11:30 -05002036 BUG_ON(!device->writeable);
Chris Masondfe25022008-05-13 13:46:40 -04002037 if (device->total_bytes > device->bytes_used)
2038 avail = device->total_bytes - device->bytes_used;
2039 else
2040 avail = 0;
Chris Mason6324fbf2008-03-24 15:01:59 -04002041 cur = cur->next;
Chris Mason8f18cf12008-04-25 16:53:30 -04002042
Chris Masondfe25022008-05-13 13:46:40 -04002043 if (device->in_fs_metadata && avail >= min_free) {
Yan Zheng2b820322008-11-17 21:11:30 -05002044 ret = find_free_dev_extent(trans, device,
2045 min_free, &dev_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04002046 if (ret == 0) {
2047 list_move_tail(&device->dev_alloc_list,
2048 &private_devs);
Yan Zheng2b820322008-11-17 21:11:30 -05002049 map->stripes[index].dev = device;
2050 map->stripes[index].physical = dev_offset;
Chris Mason611f0e02008-04-03 16:29:03 -04002051 index++;
Yan Zheng2b820322008-11-17 21:11:30 -05002052 if (type & BTRFS_BLOCK_GROUP_DUP) {
2053 map->stripes[index].dev = device;
2054 map->stripes[index].physical =
2055 dev_offset + calc_size;
Chris Mason8f18cf12008-04-25 16:53:30 -04002056 index++;
Yan Zheng2b820322008-11-17 21:11:30 -05002057 }
Chris Mason8f18cf12008-04-25 16:53:30 -04002058 }
Chris Masondfe25022008-05-13 13:46:40 -04002059 } else if (device->in_fs_metadata && avail > max_avail)
Chris Masona40a90a2008-04-18 11:55:51 -04002060 max_avail = avail;
Yan Zheng2b820322008-11-17 21:11:30 -05002061 if (cur == &fs_devices->alloc_list)
Chris Mason6324fbf2008-03-24 15:01:59 -04002062 break;
2063 }
Yan Zheng2b820322008-11-17 21:11:30 -05002064 list_splice(&private_devs, &fs_devices->alloc_list);
Chris Mason6324fbf2008-03-24 15:01:59 -04002065 if (index < num_stripes) {
Chris Masona40a90a2008-04-18 11:55:51 -04002066 if (index >= min_stripes) {
2067 num_stripes = index;
2068 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2069 num_stripes /= sub_stripes;
2070 num_stripes *= sub_stripes;
2071 }
2072 looped = 1;
2073 goto again;
2074 }
Chris Mason6324fbf2008-03-24 15:01:59 -04002075 if (!looped && max_avail > 0) {
2076 looped = 1;
2077 calc_size = max_avail;
2078 goto again;
2079 }
Yan Zheng2b820322008-11-17 21:11:30 -05002080 kfree(map);
Chris Mason6324fbf2008-03-24 15:01:59 -04002081 return -ENOSPC;
2082 }
Chris Mason593060d2008-03-25 16:50:33 -04002083 map->sector_size = extent_root->sectorsize;
2084 map->stripe_len = stripe_len;
2085 map->io_align = stripe_len;
2086 map->io_width = stripe_len;
2087 map->type = type;
2088 map->num_stripes = num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04002089 map->sub_stripes = sub_stripes;
Chris Mason0b86a832008-03-24 15:01:56 -04002090
Yan Zheng2b820322008-11-17 21:11:30 -05002091 *map_ret = map;
2092 *stripe_size = calc_size;
2093 *num_bytes = chunk_bytes_by_type(type, calc_size,
2094 num_stripes, sub_stripes);
Chris Mason0b86a832008-03-24 15:01:56 -04002095
2096 em = alloc_extent_map(GFP_NOFS);
Yan Zheng2b820322008-11-17 21:11:30 -05002097 if (!em) {
2098 kfree(map);
Chris Mason0b86a832008-03-24 15:01:56 -04002099 return -ENOMEM;
Yan Zheng2b820322008-11-17 21:11:30 -05002100 }
Chris Mason0b86a832008-03-24 15:01:56 -04002101 em->bdev = (struct block_device *)map;
Yan Zheng2b820322008-11-17 21:11:30 -05002102 em->start = start;
Chris Masone17cade2008-04-15 15:41:47 -04002103 em->len = *num_bytes;
Chris Mason0b86a832008-03-24 15:01:56 -04002104 em->block_start = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04002105 em->block_len = em->len;
Chris Mason0b86a832008-03-24 15:01:56 -04002106
Chris Mason0b86a832008-03-24 15:01:56 -04002107 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
2108 spin_lock(&em_tree->lock);
2109 ret = add_extent_mapping(em_tree, em);
Chris Mason0b86a832008-03-24 15:01:56 -04002110 spin_unlock(&em_tree->lock);
Chris Masonb248a412008-04-14 09:48:18 -04002111 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04002112 free_extent_map(em);
Yan Zheng2b820322008-11-17 21:11:30 -05002113
2114 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2115 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2116 start, *num_bytes);
2117 BUG_ON(ret);
2118
2119 index = 0;
2120 while (index < map->num_stripes) {
2121 device = map->stripes[index].dev;
2122 dev_offset = map->stripes[index].physical;
2123
2124 ret = btrfs_alloc_dev_extent(trans, device,
2125 info->chunk_root->root_key.objectid,
2126 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2127 start, dev_offset, calc_size);
2128 BUG_ON(ret);
2129 index++;
2130 }
2131
2132 return 0;
2133}
2134
2135static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2136 struct btrfs_root *extent_root,
2137 struct map_lookup *map, u64 chunk_offset,
2138 u64 chunk_size, u64 stripe_size)
2139{
2140 u64 dev_offset;
2141 struct btrfs_key key;
2142 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2143 struct btrfs_device *device;
2144 struct btrfs_chunk *chunk;
2145 struct btrfs_stripe *stripe;
2146 size_t item_size = btrfs_chunk_item_size(map->num_stripes);
2147 int index = 0;
2148 int ret;
2149
2150 chunk = kzalloc(item_size, GFP_NOFS);
2151 if (!chunk)
2152 return -ENOMEM;
2153
2154 index = 0;
2155 while (index < map->num_stripes) {
2156 device = map->stripes[index].dev;
2157 device->bytes_used += stripe_size;
2158 ret = btrfs_update_device(trans, device);
2159 BUG_ON(ret);
2160 index++;
2161 }
2162
2163 index = 0;
2164 stripe = &chunk->stripe;
2165 while (index < map->num_stripes) {
2166 device = map->stripes[index].dev;
2167 dev_offset = map->stripes[index].physical;
2168
2169 btrfs_set_stack_stripe_devid(stripe, device->devid);
2170 btrfs_set_stack_stripe_offset(stripe, dev_offset);
2171 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
2172 stripe++;
2173 index++;
2174 }
2175
2176 btrfs_set_stack_chunk_length(chunk, chunk_size);
2177 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
2178 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
2179 btrfs_set_stack_chunk_type(chunk, map->type);
2180 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
2181 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
2182 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
2183 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
2184 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
2185
2186 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2187 key.type = BTRFS_CHUNK_ITEM_KEY;
2188 key.offset = chunk_offset;
2189
2190 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
2191 BUG_ON(ret);
2192
2193 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2194 ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
2195 item_size);
2196 BUG_ON(ret);
2197 }
2198 kfree(chunk);
2199 return 0;
2200}
2201
2202/*
2203 * Chunk allocation falls into two parts. The first part does works
2204 * that make the new allocated chunk useable, but not do any operation
2205 * that modifies the chunk tree. The second part does the works that
2206 * require modifying the chunk tree. This division is important for the
2207 * bootstrap process of adding storage to a seed btrfs.
2208 */
2209int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2210 struct btrfs_root *extent_root, u64 type)
2211{
2212 u64 chunk_offset;
2213 u64 chunk_size;
2214 u64 stripe_size;
2215 struct map_lookup *map;
2216 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2217 int ret;
2218
2219 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2220 &chunk_offset);
2221 if (ret)
2222 return ret;
2223
2224 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2225 &stripe_size, chunk_offset, type);
2226 if (ret)
2227 return ret;
2228
2229 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2230 chunk_size, stripe_size);
2231 BUG_ON(ret);
2232 return 0;
2233}
2234
2235static int noinline init_first_rw_device(struct btrfs_trans_handle *trans,
2236 struct btrfs_root *root,
2237 struct btrfs_device *device)
2238{
2239 u64 chunk_offset;
2240 u64 sys_chunk_offset;
2241 u64 chunk_size;
2242 u64 sys_chunk_size;
2243 u64 stripe_size;
2244 u64 sys_stripe_size;
2245 u64 alloc_profile;
2246 struct map_lookup *map;
2247 struct map_lookup *sys_map;
2248 struct btrfs_fs_info *fs_info = root->fs_info;
2249 struct btrfs_root *extent_root = fs_info->extent_root;
2250 int ret;
2251
2252 ret = find_next_chunk(fs_info->chunk_root,
2253 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2254 BUG_ON(ret);
2255
2256 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2257 (fs_info->metadata_alloc_profile &
2258 fs_info->avail_metadata_alloc_bits);
2259 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2260
2261 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2262 &stripe_size, chunk_offset, alloc_profile);
2263 BUG_ON(ret);
2264
2265 sys_chunk_offset = chunk_offset + chunk_size;
2266
2267 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
2268 (fs_info->system_alloc_profile &
2269 fs_info->avail_system_alloc_bits);
2270 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2271
2272 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
2273 &sys_chunk_size, &sys_stripe_size,
2274 sys_chunk_offset, alloc_profile);
2275 BUG_ON(ret);
2276
2277 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
2278 BUG_ON(ret);
2279
2280 /*
2281 * Modifying chunk tree needs allocating new blocks from both
2282 * system block group and metadata block group. So we only can
2283 * do operations require modifying the chunk tree after both
2284 * block groups were created.
2285 */
2286 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2287 chunk_size, stripe_size);
2288 BUG_ON(ret);
2289
2290 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
2291 sys_chunk_offset, sys_chunk_size,
2292 sys_stripe_size);
2293 BUG_ON(ret);
2294 return 0;
2295}
2296
2297int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2298{
2299 struct extent_map *em;
2300 struct map_lookup *map;
2301 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2302 int readonly = 0;
2303 int i;
2304
2305 spin_lock(&map_tree->map_tree.lock);
2306 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2307 spin_unlock(&map_tree->map_tree.lock);
2308 if (!em)
2309 return 1;
2310
2311 map = (struct map_lookup *)em->bdev;
2312 for (i = 0; i < map->num_stripes; i++) {
2313 if (!map->stripes[i].dev->writeable) {
2314 readonly = 1;
2315 break;
2316 }
2317 }
2318 free_extent_map(em);
2319 return readonly;
Chris Mason0b86a832008-03-24 15:01:56 -04002320}
2321
2322void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2323{
2324 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
2325}
2326
2327void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
2328{
2329 struct extent_map *em;
2330
2331 while(1) {
2332 spin_lock(&tree->map_tree.lock);
2333 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
2334 if (em)
2335 remove_extent_mapping(&tree->map_tree, em);
2336 spin_unlock(&tree->map_tree.lock);
2337 if (!em)
2338 break;
2339 kfree(em->bdev);
2340 /* once for us */
2341 free_extent_map(em);
2342 /* once for the tree */
2343 free_extent_map(em);
2344 }
2345}
2346
Chris Masonf1885912008-04-09 16:28:12 -04002347int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
2348{
2349 struct extent_map *em;
2350 struct map_lookup *map;
2351 struct extent_map_tree *em_tree = &map_tree->map_tree;
2352 int ret;
2353
2354 spin_lock(&em_tree->lock);
2355 em = lookup_extent_mapping(em_tree, logical, len);
Chris Masonb248a412008-04-14 09:48:18 -04002356 spin_unlock(&em_tree->lock);
Chris Masonf1885912008-04-09 16:28:12 -04002357 BUG_ON(!em);
2358
2359 BUG_ON(em->start > logical || em->start + em->len < logical);
2360 map = (struct map_lookup *)em->bdev;
2361 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
2362 ret = map->num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04002363 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
2364 ret = map->sub_stripes;
Chris Masonf1885912008-04-09 16:28:12 -04002365 else
2366 ret = 1;
2367 free_extent_map(em);
Chris Masonf1885912008-04-09 16:28:12 -04002368 return ret;
2369}
2370
Chris Masondfe25022008-05-13 13:46:40 -04002371static int find_live_mirror(struct map_lookup *map, int first, int num,
2372 int optimal)
2373{
2374 int i;
2375 if (map->stripes[optimal].dev->bdev)
2376 return optimal;
2377 for (i = first; i < first + num; i++) {
2378 if (map->stripes[i].dev->bdev)
2379 return i;
2380 }
2381 /* we couldn't find one that doesn't fail. Just return something
2382 * and the io error handling code will clean up eventually
2383 */
2384 return optimal;
2385}
2386
Chris Masonf2d8d742008-04-21 10:03:05 -04002387static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2388 u64 logical, u64 *length,
2389 struct btrfs_multi_bio **multi_ret,
2390 int mirror_num, struct page *unplug_page)
Chris Mason0b86a832008-03-24 15:01:56 -04002391{
2392 struct extent_map *em;
2393 struct map_lookup *map;
2394 struct extent_map_tree *em_tree = &map_tree->map_tree;
2395 u64 offset;
Chris Mason593060d2008-03-25 16:50:33 -04002396 u64 stripe_offset;
2397 u64 stripe_nr;
Chris Masoncea9e442008-04-09 16:28:12 -04002398 int stripes_allocated = 8;
Chris Mason321aecc2008-04-16 10:49:51 -04002399 int stripes_required = 1;
Chris Mason593060d2008-03-25 16:50:33 -04002400 int stripe_index;
Chris Masoncea9e442008-04-09 16:28:12 -04002401 int i;
Chris Masonf2d8d742008-04-21 10:03:05 -04002402 int num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002403 int max_errors = 0;
Chris Masoncea9e442008-04-09 16:28:12 -04002404 struct btrfs_multi_bio *multi = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04002405
Chris Masoncea9e442008-04-09 16:28:12 -04002406 if (multi_ret && !(rw & (1 << BIO_RW))) {
2407 stripes_allocated = 1;
2408 }
2409again:
2410 if (multi_ret) {
2411 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
2412 GFP_NOFS);
2413 if (!multi)
2414 return -ENOMEM;
Chris Masona236aed2008-04-29 09:38:00 -04002415
2416 atomic_set(&multi->error, 0);
Chris Masoncea9e442008-04-09 16:28:12 -04002417 }
Chris Mason0b86a832008-03-24 15:01:56 -04002418
2419 spin_lock(&em_tree->lock);
2420 em = lookup_extent_mapping(em_tree, logical, *length);
Chris Masonb248a412008-04-14 09:48:18 -04002421 spin_unlock(&em_tree->lock);
Chris Masonf2d8d742008-04-21 10:03:05 -04002422
2423 if (!em && unplug_page)
2424 return 0;
2425
Chris Mason3b951512008-04-17 11:29:12 -04002426 if (!em) {
Chris Masona061fc82008-05-07 11:43:44 -04002427 printk("unable to find logical %Lu len %Lu\n", logical, *length);
Chris Masonf2d8d742008-04-21 10:03:05 -04002428 BUG();
Chris Mason3b951512008-04-17 11:29:12 -04002429 }
Chris Mason0b86a832008-03-24 15:01:56 -04002430
2431 BUG_ON(em->start > logical || em->start + em->len < logical);
2432 map = (struct map_lookup *)em->bdev;
2433 offset = logical - em->start;
Chris Mason593060d2008-03-25 16:50:33 -04002434
Chris Masonf1885912008-04-09 16:28:12 -04002435 if (mirror_num > map->num_stripes)
2436 mirror_num = 0;
2437
Chris Masoncea9e442008-04-09 16:28:12 -04002438 /* if our multi bio struct is too small, back off and try again */
Chris Mason321aecc2008-04-16 10:49:51 -04002439 if (rw & (1 << BIO_RW)) {
2440 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
2441 BTRFS_BLOCK_GROUP_DUP)) {
2442 stripes_required = map->num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002443 max_errors = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04002444 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2445 stripes_required = map->sub_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002446 max_errors = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04002447 }
2448 }
2449 if (multi_ret && rw == WRITE &&
2450 stripes_allocated < stripes_required) {
Chris Masoncea9e442008-04-09 16:28:12 -04002451 stripes_allocated = map->num_stripes;
Chris Masoncea9e442008-04-09 16:28:12 -04002452 free_extent_map(em);
2453 kfree(multi);
2454 goto again;
2455 }
Chris Mason593060d2008-03-25 16:50:33 -04002456 stripe_nr = offset;
2457 /*
2458 * stripe_nr counts the total number of stripes we have to stride
2459 * to get to this block
2460 */
2461 do_div(stripe_nr, map->stripe_len);
2462
2463 stripe_offset = stripe_nr * map->stripe_len;
2464 BUG_ON(offset < stripe_offset);
2465
2466 /* stripe_offset is the offset of this block in its stripe*/
2467 stripe_offset = offset - stripe_offset;
2468
Chris Masoncea9e442008-04-09 16:28:12 -04002469 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
Chris Mason321aecc2008-04-16 10:49:51 -04002470 BTRFS_BLOCK_GROUP_RAID10 |
Chris Masoncea9e442008-04-09 16:28:12 -04002471 BTRFS_BLOCK_GROUP_DUP)) {
2472 /* we limit the length of each bio to what fits in a stripe */
2473 *length = min_t(u64, em->len - offset,
2474 map->stripe_len - stripe_offset);
2475 } else {
2476 *length = em->len - offset;
2477 }
Chris Masonf2d8d742008-04-21 10:03:05 -04002478
2479 if (!multi_ret && !unplug_page)
Chris Masoncea9e442008-04-09 16:28:12 -04002480 goto out;
2481
Chris Masonf2d8d742008-04-21 10:03:05 -04002482 num_stripes = 1;
Chris Masoncea9e442008-04-09 16:28:12 -04002483 stripe_index = 0;
Chris Mason8790d502008-04-03 16:29:03 -04002484 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
Chris Masonf2d8d742008-04-21 10:03:05 -04002485 if (unplug_page || (rw & (1 << BIO_RW)))
2486 num_stripes = map->num_stripes;
Chris Mason2fff7342008-04-29 14:12:09 -04002487 else if (mirror_num)
Chris Masonf1885912008-04-09 16:28:12 -04002488 stripe_index = mirror_num - 1;
Chris Masondfe25022008-05-13 13:46:40 -04002489 else {
2490 stripe_index = find_live_mirror(map, 0,
2491 map->num_stripes,
2492 current->pid % map->num_stripes);
2493 }
Chris Mason2fff7342008-04-29 14:12:09 -04002494
Chris Mason611f0e02008-04-03 16:29:03 -04002495 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
Chris Masoncea9e442008-04-09 16:28:12 -04002496 if (rw & (1 << BIO_RW))
Chris Masonf2d8d742008-04-21 10:03:05 -04002497 num_stripes = map->num_stripes;
Chris Masonf1885912008-04-09 16:28:12 -04002498 else if (mirror_num)
2499 stripe_index = mirror_num - 1;
Chris Mason2fff7342008-04-29 14:12:09 -04002500
Chris Mason321aecc2008-04-16 10:49:51 -04002501 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2502 int factor = map->num_stripes / map->sub_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04002503
2504 stripe_index = do_div(stripe_nr, factor);
2505 stripe_index *= map->sub_stripes;
2506
Chris Masonf2d8d742008-04-21 10:03:05 -04002507 if (unplug_page || (rw & (1 << BIO_RW)))
2508 num_stripes = map->sub_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04002509 else if (mirror_num)
2510 stripe_index += mirror_num - 1;
Chris Masondfe25022008-05-13 13:46:40 -04002511 else {
2512 stripe_index = find_live_mirror(map, stripe_index,
2513 map->sub_stripes, stripe_index +
2514 current->pid % map->sub_stripes);
2515 }
Chris Mason8790d502008-04-03 16:29:03 -04002516 } else {
2517 /*
2518 * after this do_div call, stripe_nr is the number of stripes
2519 * on this device we have to walk to find the data, and
2520 * stripe_index is the number of our device in the stripe array
2521 */
2522 stripe_index = do_div(stripe_nr, map->num_stripes);
2523 }
Chris Mason593060d2008-03-25 16:50:33 -04002524 BUG_ON(stripe_index >= map->num_stripes);
Chris Mason593060d2008-03-25 16:50:33 -04002525
Chris Masonf2d8d742008-04-21 10:03:05 -04002526 for (i = 0; i < num_stripes; i++) {
2527 if (unplug_page) {
2528 struct btrfs_device *device;
2529 struct backing_dev_info *bdi;
2530
2531 device = map->stripes[stripe_index].dev;
Chris Masondfe25022008-05-13 13:46:40 -04002532 if (device->bdev) {
2533 bdi = blk_get_backing_dev_info(device->bdev);
2534 if (bdi->unplug_io_fn) {
2535 bdi->unplug_io_fn(bdi, unplug_page);
2536 }
Chris Masonf2d8d742008-04-21 10:03:05 -04002537 }
2538 } else {
2539 multi->stripes[i].physical =
2540 map->stripes[stripe_index].physical +
2541 stripe_offset + stripe_nr * map->stripe_len;
2542 multi->stripes[i].dev = map->stripes[stripe_index].dev;
2543 }
Chris Masoncea9e442008-04-09 16:28:12 -04002544 stripe_index++;
Chris Mason593060d2008-03-25 16:50:33 -04002545 }
Chris Masonf2d8d742008-04-21 10:03:05 -04002546 if (multi_ret) {
2547 *multi_ret = multi;
2548 multi->num_stripes = num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002549 multi->max_errors = max_errors;
Chris Masonf2d8d742008-04-21 10:03:05 -04002550 }
Chris Masoncea9e442008-04-09 16:28:12 -04002551out:
Chris Mason0b86a832008-03-24 15:01:56 -04002552 free_extent_map(em);
Chris Mason0b86a832008-03-24 15:01:56 -04002553 return 0;
2554}
2555
Chris Masonf2d8d742008-04-21 10:03:05 -04002556int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2557 u64 logical, u64 *length,
2558 struct btrfs_multi_bio **multi_ret, int mirror_num)
2559{
2560 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
2561 mirror_num, NULL);
2562}
2563
2564int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
2565 u64 logical, struct page *page)
2566{
2567 u64 length = PAGE_CACHE_SIZE;
2568 return __btrfs_map_block(map_tree, READ, logical, &length,
2569 NULL, 0, page);
2570}
2571
2572
Chris Mason8790d502008-04-03 16:29:03 -04002573static void end_bio_multi_stripe(struct bio *bio, int err)
Chris Mason8790d502008-04-03 16:29:03 -04002574{
Chris Masoncea9e442008-04-09 16:28:12 -04002575 struct btrfs_multi_bio *multi = bio->bi_private;
Chris Mason7d2b4da2008-08-05 10:13:57 -04002576 int is_orig_bio = 0;
Chris Mason8790d502008-04-03 16:29:03 -04002577
Chris Mason8790d502008-04-03 16:29:03 -04002578 if (err)
Chris Masona236aed2008-04-29 09:38:00 -04002579 atomic_inc(&multi->error);
Chris Mason8790d502008-04-03 16:29:03 -04002580
Chris Mason7d2b4da2008-08-05 10:13:57 -04002581 if (bio == multi->orig_bio)
2582 is_orig_bio = 1;
2583
Chris Masoncea9e442008-04-09 16:28:12 -04002584 if (atomic_dec_and_test(&multi->stripes_pending)) {
Chris Mason7d2b4da2008-08-05 10:13:57 -04002585 if (!is_orig_bio) {
2586 bio_put(bio);
2587 bio = multi->orig_bio;
2588 }
Chris Mason8790d502008-04-03 16:29:03 -04002589 bio->bi_private = multi->private;
2590 bio->bi_end_io = multi->end_io;
Chris Masona236aed2008-04-29 09:38:00 -04002591 /* only send an error to the higher layers if it is
2592 * beyond the tolerance of the multi-bio
2593 */
Chris Mason1259ab72008-05-12 13:39:03 -04002594 if (atomic_read(&multi->error) > multi->max_errors) {
Chris Masona236aed2008-04-29 09:38:00 -04002595 err = -EIO;
Chris Mason1259ab72008-05-12 13:39:03 -04002596 } else if (err) {
2597 /*
2598 * this bio is actually up to date, we didn't
2599 * go over the max number of errors
2600 */
2601 set_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Masona236aed2008-04-29 09:38:00 -04002602 err = 0;
Chris Mason1259ab72008-05-12 13:39:03 -04002603 }
Chris Mason8790d502008-04-03 16:29:03 -04002604 kfree(multi);
2605
2606 bio_endio(bio, err);
Chris Mason7d2b4da2008-08-05 10:13:57 -04002607 } else if (!is_orig_bio) {
Chris Mason8790d502008-04-03 16:29:03 -04002608 bio_put(bio);
2609 }
Chris Mason8790d502008-04-03 16:29:03 -04002610}
2611
Chris Mason8b712842008-06-11 16:50:36 -04002612struct async_sched {
2613 struct bio *bio;
2614 int rw;
2615 struct btrfs_fs_info *info;
2616 struct btrfs_work work;
2617};
2618
2619/*
2620 * see run_scheduled_bios for a description of why bios are collected for
2621 * async submit.
2622 *
2623 * This will add one bio to the pending list for a device and make sure
2624 * the work struct is scheduled.
2625 */
Chris Masona1b32a52008-09-05 16:09:51 -04002626static int noinline schedule_bio(struct btrfs_root *root,
2627 struct btrfs_device *device,
2628 int rw, struct bio *bio)
Chris Mason8b712842008-06-11 16:50:36 -04002629{
2630 int should_queue = 1;
2631
2632 /* don't bother with additional async steps for reads, right now */
2633 if (!(rw & (1 << BIO_RW))) {
Chris Mason492bb6de2008-07-31 16:29:02 -04002634 bio_get(bio);
Chris Mason8b712842008-06-11 16:50:36 -04002635 submit_bio(rw, bio);
Chris Mason492bb6de2008-07-31 16:29:02 -04002636 bio_put(bio);
Chris Mason8b712842008-06-11 16:50:36 -04002637 return 0;
2638 }
2639
2640 /*
Chris Mason0986fe9e2008-08-15 15:34:15 -04002641 * nr_async_bios allows us to reliably return congestion to the
Chris Mason8b712842008-06-11 16:50:36 -04002642 * higher layers. Otherwise, the async bio makes it appear we have
2643 * made progress against dirty pages when we've really just put it
2644 * on a queue for later
2645 */
Chris Mason0986fe9e2008-08-15 15:34:15 -04002646 atomic_inc(&root->fs_info->nr_async_bios);
Chris Mason492bb6de2008-07-31 16:29:02 -04002647 WARN_ON(bio->bi_next);
Chris Mason8b712842008-06-11 16:50:36 -04002648 bio->bi_next = NULL;
2649 bio->bi_rw |= rw;
2650
2651 spin_lock(&device->io_lock);
2652
2653 if (device->pending_bio_tail)
2654 device->pending_bio_tail->bi_next = bio;
2655
2656 device->pending_bio_tail = bio;
2657 if (!device->pending_bios)
2658 device->pending_bios = bio;
2659 if (device->running_pending)
2660 should_queue = 0;
2661
2662 spin_unlock(&device->io_lock);
2663
2664 if (should_queue)
Chris Mason1cc127b2008-06-12 14:46:17 -04002665 btrfs_queue_worker(&root->fs_info->submit_workers,
2666 &device->work);
Chris Mason8b712842008-06-11 16:50:36 -04002667 return 0;
2668}
2669
Chris Masonf1885912008-04-09 16:28:12 -04002670int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
Chris Mason8b712842008-06-11 16:50:36 -04002671 int mirror_num, int async_submit)
Chris Mason0b86a832008-03-24 15:01:56 -04002672{
2673 struct btrfs_mapping_tree *map_tree;
2674 struct btrfs_device *dev;
Chris Mason8790d502008-04-03 16:29:03 -04002675 struct bio *first_bio = bio;
Chris Masona62b9402008-10-03 16:31:08 -04002676 u64 logical = (u64)bio->bi_sector << 9;
Chris Mason0b86a832008-03-24 15:01:56 -04002677 u64 length = 0;
2678 u64 map_length;
Chris Masoncea9e442008-04-09 16:28:12 -04002679 struct btrfs_multi_bio *multi = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04002680 int ret;
Chris Mason8790d502008-04-03 16:29:03 -04002681 int dev_nr = 0;
2682 int total_devs = 1;
Chris Mason0b86a832008-03-24 15:01:56 -04002683
Chris Masonf2d8d742008-04-21 10:03:05 -04002684 length = bio->bi_size;
Chris Mason0b86a832008-03-24 15:01:56 -04002685 map_tree = &root->fs_info->mapping_tree;
2686 map_length = length;
Chris Masoncea9e442008-04-09 16:28:12 -04002687
Chris Masonf1885912008-04-09 16:28:12 -04002688 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
2689 mirror_num);
Chris Masoncea9e442008-04-09 16:28:12 -04002690 BUG_ON(ret);
2691
2692 total_devs = multi->num_stripes;
2693 if (map_length < length) {
2694 printk("mapping failed logical %Lu bio len %Lu "
2695 "len %Lu\n", logical, length, map_length);
2696 BUG();
2697 }
2698 multi->end_io = first_bio->bi_end_io;
2699 multi->private = first_bio->bi_private;
Chris Mason7d2b4da2008-08-05 10:13:57 -04002700 multi->orig_bio = first_bio;
Chris Masoncea9e442008-04-09 16:28:12 -04002701 atomic_set(&multi->stripes_pending, multi->num_stripes);
2702
Chris Mason8790d502008-04-03 16:29:03 -04002703 while(dev_nr < total_devs) {
Chris Mason8790d502008-04-03 16:29:03 -04002704 if (total_devs > 1) {
Chris Mason8790d502008-04-03 16:29:03 -04002705 if (dev_nr < total_devs - 1) {
2706 bio = bio_clone(first_bio, GFP_NOFS);
2707 BUG_ON(!bio);
2708 } else {
2709 bio = first_bio;
2710 }
2711 bio->bi_private = multi;
2712 bio->bi_end_io = end_bio_multi_stripe;
2713 }
Chris Masoncea9e442008-04-09 16:28:12 -04002714 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2715 dev = multi->stripes[dev_nr].dev;
Yan Zheng2b820322008-11-17 21:11:30 -05002716 BUG_ON(rw == WRITE && !dev->writeable);
Chris Masondfe25022008-05-13 13:46:40 -04002717 if (dev && dev->bdev) {
2718 bio->bi_bdev = dev->bdev;
Chris Mason8b712842008-06-11 16:50:36 -04002719 if (async_submit)
2720 schedule_bio(root, dev, rw, bio);
2721 else
2722 submit_bio(rw, bio);
Chris Masondfe25022008-05-13 13:46:40 -04002723 } else {
2724 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2725 bio->bi_sector = logical >> 9;
Chris Masondfe25022008-05-13 13:46:40 -04002726 bio_endio(bio, -EIO);
Chris Masondfe25022008-05-13 13:46:40 -04002727 }
Chris Mason8790d502008-04-03 16:29:03 -04002728 dev_nr++;
Chris Mason239b14b2008-03-24 15:02:07 -04002729 }
Chris Masoncea9e442008-04-09 16:28:12 -04002730 if (total_devs == 1)
2731 kfree(multi);
Chris Mason0b86a832008-03-24 15:01:56 -04002732 return 0;
2733}
2734
Chris Masona4437552008-04-18 10:29:38 -04002735struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
Yan Zheng2b820322008-11-17 21:11:30 -05002736 u8 *uuid, u8 *fsid)
Chris Mason0b86a832008-03-24 15:01:56 -04002737{
Yan Zheng2b820322008-11-17 21:11:30 -05002738 struct btrfs_device *device;
2739 struct btrfs_fs_devices *cur_devices;
Chris Mason0b86a832008-03-24 15:01:56 -04002740
Yan Zheng2b820322008-11-17 21:11:30 -05002741 cur_devices = root->fs_info->fs_devices;
2742 while (cur_devices) {
2743 if (!fsid ||
2744 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2745 device = __find_device(&cur_devices->devices,
2746 devid, uuid);
2747 if (device)
2748 return device;
2749 }
2750 cur_devices = cur_devices->seed;
2751 }
2752 return NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04002753}
2754
Chris Masondfe25022008-05-13 13:46:40 -04002755static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2756 u64 devid, u8 *dev_uuid)
2757{
2758 struct btrfs_device *device;
2759 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2760
2761 device = kzalloc(sizeof(*device), GFP_NOFS);
yanhai zhu7cbd8a82008-11-12 14:38:54 -05002762 if (!device)
2763 return NULL;
Chris Masondfe25022008-05-13 13:46:40 -04002764 list_add(&device->dev_list,
2765 &fs_devices->devices);
Chris Masondfe25022008-05-13 13:46:40 -04002766 device->barriers = 1;
2767 device->dev_root = root->fs_info->dev_root;
2768 device->devid = devid;
Chris Mason8b712842008-06-11 16:50:36 -04002769 device->work.func = pending_bios_fn;
Chris Masondfe25022008-05-13 13:46:40 -04002770 fs_devices->num_devices++;
2771 spin_lock_init(&device->io_lock);
2772 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2773 return device;
2774}
2775
Chris Mason0b86a832008-03-24 15:01:56 -04002776static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2777 struct extent_buffer *leaf,
2778 struct btrfs_chunk *chunk)
2779{
2780 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2781 struct map_lookup *map;
2782 struct extent_map *em;
2783 u64 logical;
2784 u64 length;
2785 u64 devid;
Chris Masona4437552008-04-18 10:29:38 -04002786 u8 uuid[BTRFS_UUID_SIZE];
Chris Mason593060d2008-03-25 16:50:33 -04002787 int num_stripes;
Chris Mason0b86a832008-03-24 15:01:56 -04002788 int ret;
Chris Mason593060d2008-03-25 16:50:33 -04002789 int i;
Chris Mason0b86a832008-03-24 15:01:56 -04002790
Chris Masone17cade2008-04-15 15:41:47 -04002791 logical = key->offset;
2792 length = btrfs_chunk_length(leaf, chunk);
Chris Masona061fc82008-05-07 11:43:44 -04002793
Chris Mason0b86a832008-03-24 15:01:56 -04002794 spin_lock(&map_tree->map_tree.lock);
2795 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
Chris Masonb248a412008-04-14 09:48:18 -04002796 spin_unlock(&map_tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04002797
2798 /* already mapped? */
2799 if (em && em->start <= logical && em->start + em->len > logical) {
2800 free_extent_map(em);
Chris Mason0b86a832008-03-24 15:01:56 -04002801 return 0;
2802 } else if (em) {
2803 free_extent_map(em);
2804 }
Chris Mason0b86a832008-03-24 15:01:56 -04002805
2806 map = kzalloc(sizeof(*map), GFP_NOFS);
2807 if (!map)
2808 return -ENOMEM;
2809
2810 em = alloc_extent_map(GFP_NOFS);
2811 if (!em)
2812 return -ENOMEM;
Chris Mason593060d2008-03-25 16:50:33 -04002813 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2814 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
Chris Mason0b86a832008-03-24 15:01:56 -04002815 if (!map) {
2816 free_extent_map(em);
2817 return -ENOMEM;
2818 }
2819
2820 em->bdev = (struct block_device *)map;
2821 em->start = logical;
2822 em->len = length;
2823 em->block_start = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04002824 em->block_len = em->len;
Chris Mason0b86a832008-03-24 15:01:56 -04002825
Chris Mason593060d2008-03-25 16:50:33 -04002826 map->num_stripes = num_stripes;
2827 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2828 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2829 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2830 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2831 map->type = btrfs_chunk_type(leaf, chunk);
Chris Mason321aecc2008-04-16 10:49:51 -04002832 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
Chris Mason593060d2008-03-25 16:50:33 -04002833 for (i = 0; i < num_stripes; i++) {
2834 map->stripes[i].physical =
2835 btrfs_stripe_offset_nr(leaf, chunk, i);
2836 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
Chris Masona4437552008-04-18 10:29:38 -04002837 read_extent_buffer(leaf, uuid, (unsigned long)
2838 btrfs_stripe_dev_uuid_nr(chunk, i),
2839 BTRFS_UUID_SIZE);
Yan Zheng2b820322008-11-17 21:11:30 -05002840 map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
2841 NULL);
Chris Masondfe25022008-05-13 13:46:40 -04002842 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
Chris Mason593060d2008-03-25 16:50:33 -04002843 kfree(map);
2844 free_extent_map(em);
2845 return -EIO;
2846 }
Chris Masondfe25022008-05-13 13:46:40 -04002847 if (!map->stripes[i].dev) {
2848 map->stripes[i].dev =
2849 add_missing_dev(root, devid, uuid);
2850 if (!map->stripes[i].dev) {
2851 kfree(map);
2852 free_extent_map(em);
2853 return -EIO;
2854 }
2855 }
2856 map->stripes[i].dev->in_fs_metadata = 1;
Chris Mason0b86a832008-03-24 15:01:56 -04002857 }
2858
2859 spin_lock(&map_tree->map_tree.lock);
2860 ret = add_extent_mapping(&map_tree->map_tree, em);
Chris Mason0b86a832008-03-24 15:01:56 -04002861 spin_unlock(&map_tree->map_tree.lock);
Chris Masonb248a412008-04-14 09:48:18 -04002862 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04002863 free_extent_map(em);
2864
2865 return 0;
2866}
2867
2868static int fill_device_from_item(struct extent_buffer *leaf,
2869 struct btrfs_dev_item *dev_item,
2870 struct btrfs_device *device)
2871{
2872 unsigned long ptr;
Chris Mason0b86a832008-03-24 15:01:56 -04002873
2874 device->devid = btrfs_device_id(leaf, dev_item);
2875 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2876 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2877 device->type = btrfs_device_type(leaf, dev_item);
2878 device->io_align = btrfs_device_io_align(leaf, dev_item);
2879 device->io_width = btrfs_device_io_width(leaf, dev_item);
2880 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
Chris Mason0b86a832008-03-24 15:01:56 -04002881
2882 ptr = (unsigned long)btrfs_device_uuid(dev_item);
Chris Masone17cade2008-04-15 15:41:47 -04002883 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04002884
Chris Mason0b86a832008-03-24 15:01:56 -04002885 return 0;
2886}
2887
Yan Zheng2b820322008-11-17 21:11:30 -05002888static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
2889{
2890 struct btrfs_fs_devices *fs_devices;
2891 int ret;
2892
2893 mutex_lock(&uuid_mutex);
2894
2895 fs_devices = root->fs_info->fs_devices->seed;
2896 while (fs_devices) {
2897 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2898 ret = 0;
2899 goto out;
2900 }
2901 fs_devices = fs_devices->seed;
2902 }
2903
2904 fs_devices = find_fsid(fsid);
2905 if (!fs_devices) {
2906 ret = -ENOENT;
2907 goto out;
2908 }
2909 if (fs_devices->opened) {
2910 ret = -EBUSY;
2911 goto out;
2912 }
2913
Chris Mason15916de2008-11-19 21:17:22 -05002914 ret = __btrfs_open_devices(fs_devices, MS_RDONLY,
2915 root->fs_info->bdev_holder);
Yan Zheng2b820322008-11-17 21:11:30 -05002916 if (ret)
2917 goto out;
2918
2919 if (!fs_devices->seeding) {
2920 __btrfs_close_devices(fs_devices);
2921 ret = -EINVAL;
2922 goto out;
2923 }
2924
2925 fs_devices->seed = root->fs_info->fs_devices->seed;
2926 root->fs_info->fs_devices->seed = fs_devices;
2927 fs_devices->sprouted = 1;
2928out:
2929 mutex_unlock(&uuid_mutex);
2930 return ret;
2931}
2932
Chris Mason0d81ba52008-03-24 15:02:07 -04002933static int read_one_dev(struct btrfs_root *root,
Chris Mason0b86a832008-03-24 15:01:56 -04002934 struct extent_buffer *leaf,
2935 struct btrfs_dev_item *dev_item)
2936{
2937 struct btrfs_device *device;
2938 u64 devid;
2939 int ret;
Yan Zheng2b820322008-11-17 21:11:30 -05002940 int seed_devices = 0;
2941 u8 fs_uuid[BTRFS_UUID_SIZE];
Chris Masona4437552008-04-18 10:29:38 -04002942 u8 dev_uuid[BTRFS_UUID_SIZE];
2943
Chris Mason0b86a832008-03-24 15:01:56 -04002944 devid = btrfs_device_id(leaf, dev_item);
Chris Masona4437552008-04-18 10:29:38 -04002945 read_extent_buffer(leaf, dev_uuid,
2946 (unsigned long)btrfs_device_uuid(dev_item),
2947 BTRFS_UUID_SIZE);
Yan Zheng2b820322008-11-17 21:11:30 -05002948 read_extent_buffer(leaf, fs_uuid,
2949 (unsigned long)btrfs_device_fsid(dev_item),
2950 BTRFS_UUID_SIZE);
2951
2952 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
2953 ret = open_seed_devices(root, fs_uuid);
2954 if (ret)
2955 return ret;
2956 seed_devices = 1;
2957 }
2958
2959 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
2960 if (!device || !device->bdev) {
2961 if (!btrfs_test_opt(root, DEGRADED) || seed_devices)
2962 return -EIO;
2963
2964 if (!device) {
2965 printk("warning devid %Lu missing\n", devid);
2966 device = add_missing_dev(root, devid, dev_uuid);
2967 if (!device)
2968 return -ENOMEM;
2969 }
2970 }
2971
2972 if (device->fs_devices != root->fs_info->fs_devices) {
2973 BUG_ON(device->writeable);
2974 if (device->generation !=
2975 btrfs_device_generation(leaf, dev_item))
2976 return -EINVAL;
Chris Mason6324fbf2008-03-24 15:01:59 -04002977 }
Chris Mason0b86a832008-03-24 15:01:56 -04002978
2979 fill_device_from_item(leaf, dev_item, device);
2980 device->dev_root = root->fs_info->dev_root;
Chris Masondfe25022008-05-13 13:46:40 -04002981 device->in_fs_metadata = 1;
Yan Zheng2b820322008-11-17 21:11:30 -05002982 if (device->writeable)
2983 device->fs_devices->total_rw_bytes += device->total_bytes;
Chris Mason0b86a832008-03-24 15:01:56 -04002984 ret = 0;
2985#if 0
2986 ret = btrfs_open_device(device);
2987 if (ret) {
2988 kfree(device);
2989 }
2990#endif
2991 return ret;
2992}
2993
Chris Mason0d81ba52008-03-24 15:02:07 -04002994int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2995{
2996 struct btrfs_dev_item *dev_item;
2997
2998 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2999 dev_item);
3000 return read_one_dev(root, buf, dev_item);
3001}
3002
Chris Mason0b86a832008-03-24 15:01:56 -04003003int btrfs_read_sys_array(struct btrfs_root *root)
3004{
3005 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
Chris Masona061fc82008-05-07 11:43:44 -04003006 struct extent_buffer *sb;
Chris Mason0b86a832008-03-24 15:01:56 -04003007 struct btrfs_disk_key *disk_key;
Chris Mason0b86a832008-03-24 15:01:56 -04003008 struct btrfs_chunk *chunk;
Chris Mason84eed902008-04-25 09:04:37 -04003009 u8 *ptr;
3010 unsigned long sb_ptr;
3011 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04003012 u32 num_stripes;
3013 u32 array_size;
3014 u32 len = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04003015 u32 cur;
Chris Mason84eed902008-04-25 09:04:37 -04003016 struct btrfs_key key;
Chris Mason0b86a832008-03-24 15:01:56 -04003017
Chris Masona061fc82008-05-07 11:43:44 -04003018 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
3019 BTRFS_SUPER_INFO_SIZE);
3020 if (!sb)
3021 return -ENOMEM;
3022 btrfs_set_buffer_uptodate(sb);
3023 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04003024 array_size = btrfs_super_sys_array_size(super_copy);
3025
Chris Mason0b86a832008-03-24 15:01:56 -04003026 ptr = super_copy->sys_chunk_array;
3027 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
3028 cur = 0;
3029
3030 while (cur < array_size) {
3031 disk_key = (struct btrfs_disk_key *)ptr;
3032 btrfs_disk_key_to_cpu(&key, disk_key);
3033
Chris Masona061fc82008-05-07 11:43:44 -04003034 len = sizeof(*disk_key); ptr += len;
Chris Mason0b86a832008-03-24 15:01:56 -04003035 sb_ptr += len;
3036 cur += len;
3037
Chris Mason0d81ba52008-03-24 15:02:07 -04003038 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
Chris Mason0b86a832008-03-24 15:01:56 -04003039 chunk = (struct btrfs_chunk *)sb_ptr;
Chris Mason0d81ba52008-03-24 15:02:07 -04003040 ret = read_one_chunk(root, &key, sb, chunk);
Chris Mason84eed902008-04-25 09:04:37 -04003041 if (ret)
3042 break;
Chris Mason0b86a832008-03-24 15:01:56 -04003043 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
3044 len = btrfs_chunk_item_size(num_stripes);
3045 } else {
Chris Mason84eed902008-04-25 09:04:37 -04003046 ret = -EIO;
3047 break;
Chris Mason0b86a832008-03-24 15:01:56 -04003048 }
3049 ptr += len;
3050 sb_ptr += len;
3051 cur += len;
3052 }
Chris Masona061fc82008-05-07 11:43:44 -04003053 free_extent_buffer(sb);
Chris Mason84eed902008-04-25 09:04:37 -04003054 return ret;
Chris Mason0b86a832008-03-24 15:01:56 -04003055}
3056
3057int btrfs_read_chunk_tree(struct btrfs_root *root)
3058{
3059 struct btrfs_path *path;
3060 struct extent_buffer *leaf;
3061 struct btrfs_key key;
3062 struct btrfs_key found_key;
3063 int ret;
3064 int slot;
3065
3066 root = root->fs_info->chunk_root;
3067
3068 path = btrfs_alloc_path();
3069 if (!path)
3070 return -ENOMEM;
3071
3072 /* first we search for all of the device items, and then we
3073 * read in all of the chunk items. This way we can create chunk
3074 * mappings that reference all of the devices that are afound
3075 */
3076 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
3077 key.offset = 0;
3078 key.type = 0;
3079again:
3080 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3081 while(1) {
3082 leaf = path->nodes[0];
3083 slot = path->slots[0];
3084 if (slot >= btrfs_header_nritems(leaf)) {
3085 ret = btrfs_next_leaf(root, path);
3086 if (ret == 0)
3087 continue;
3088 if (ret < 0)
3089 goto error;
3090 break;
3091 }
3092 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3093 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3094 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
3095 break;
3096 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
3097 struct btrfs_dev_item *dev_item;
3098 dev_item = btrfs_item_ptr(leaf, slot,
3099 struct btrfs_dev_item);
Chris Mason0d81ba52008-03-24 15:02:07 -04003100 ret = read_one_dev(root, leaf, dev_item);
Yan Zheng2b820322008-11-17 21:11:30 -05003101 if (ret)
3102 goto error;
Chris Mason0b86a832008-03-24 15:01:56 -04003103 }
3104 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
3105 struct btrfs_chunk *chunk;
3106 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3107 ret = read_one_chunk(root, &found_key, leaf, chunk);
Yan Zheng2b820322008-11-17 21:11:30 -05003108 if (ret)
3109 goto error;
Chris Mason0b86a832008-03-24 15:01:56 -04003110 }
3111 path->slots[0]++;
3112 }
3113 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3114 key.objectid = 0;
3115 btrfs_release_path(root, path);
3116 goto again;
3117 }
Chris Mason0b86a832008-03-24 15:01:56 -04003118 ret = 0;
3119error:
Yan Zheng2b820322008-11-17 21:11:30 -05003120 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04003121 return ret;
3122}