blob: de302702ab3e23b9dc95b0dee56b786d74a3365e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * dm-snapshot.c
3 *
4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
5 *
6 * This file is released under the GPL.
7 */
8
9#include <linux/blkdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include <linux/ctype.h>
11#include <linux/device-mapper.h>
12#include <linux/fs.h>
13#include <linux/init.h>
14#include <linux/kdev_t.h>
15#include <linux/list.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
vignesh babu6f3c3f02007-10-19 22:38:44 +010020#include <linux/log2.h>
Alasdair G Kergona765e202008-04-24 22:02:01 +010021#include <linux/dm-kcopyd.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022
23#include "dm-snap.h"
24#include "dm-bio-list.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
Alasdair G Kergon72d94862006-06-26 00:27:35 -070026#define DM_MSG_PREFIX "snapshots"
27
Linus Torvalds1da177e2005-04-16 15:20:36 -070028/*
29 * The percentage increment we will wake up users at
30 */
31#define WAKE_UP_PERCENT 5
32
33/*
34 * kcopyd priority of snapshot operations
35 */
36#define SNAPSHOT_COPY_PRIORITY 2
37
38/*
Milan Broz8ee27672008-04-24 21:42:36 +010039 * Reserve 1MB for each snapshot initially (with minimum of 1 page).
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 */
Milan Broz8ee27672008-04-24 21:42:36 +010041#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -070042
Mikulas Patockacd45daf2008-07-21 12:00:32 +010043/*
44 * The size of the mempool used to track chunks in use.
45 */
46#define MIN_IOS 256
47
Adrian Bunkc642f9e2006-12-08 02:41:13 -080048static struct workqueue_struct *ksnapd;
David Howellsc4028952006-11-22 14:57:56 +000049static void flush_queued_bios(struct work_struct *work);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -070050
Alasdair G Kergon028867a2007-07-12 17:26:32 +010051struct dm_snap_pending_exception {
52 struct dm_snap_exception e;
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54 /*
55 * Origin buffers waiting for this to complete are held
56 * in a bio list
57 */
58 struct bio_list origin_bios;
59 struct bio_list snapshot_bios;
60
61 /*
Alasdair G Kergoneccf0812006-03-27 01:17:42 -080062 * Short-term queue of pending exceptions prior to submission.
63 */
64 struct list_head list;
65
66 /*
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -080067 * The primary pending_exception is the one that holds
Alasdair G Kergon4b832e82006-10-03 01:15:30 -070068 * the ref_count and the list of origin_bios for a
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -080069 * group of pending_exceptions. It is always last to get freed.
70 * These fields get set up when writing to the origin.
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +010072 struct dm_snap_pending_exception *primary_pe;
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -080073
74 /*
75 * Number of pending_exceptions processing this chunk.
76 * When this drops to zero we must complete the origin bios.
77 * If incrementing or decrementing this, hold pe->snap->lock for
78 * the sibling concerned and not pe->primary_pe->snap->lock unless
79 * they are the same.
80 */
Alasdair G Kergon4b832e82006-10-03 01:15:30 -070081 atomic_t ref_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -070082
83 /* Pointer back to snapshot context */
84 struct dm_snapshot *snap;
85
86 /*
87 * 1 indicates the exception has already been sent to
88 * kcopyd.
89 */
90 int started;
91};
92
93/*
94 * Hash table mapping origin volumes to lists of snapshots and
95 * a lock to protect it
96 */
Christoph Lametere18b8902006-12-06 20:33:20 -080097static struct kmem_cache *exception_cache;
98static struct kmem_cache *pending_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099static mempool_t *pending_pool;
100
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100101struct dm_snap_tracked_chunk {
102 struct hlist_node node;
103 chunk_t chunk;
104};
105
106static struct kmem_cache *tracked_chunk_cache;
107
108static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
109 chunk_t chunk)
110{
111 struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
112 GFP_NOIO);
113 unsigned long flags;
114
115 c->chunk = chunk;
116
117 spin_lock_irqsave(&s->tracked_chunk_lock, flags);
118 hlist_add_head(&c->node,
119 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
120 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
121
122 return c;
123}
124
125static void stop_tracking_chunk(struct dm_snapshot *s,
126 struct dm_snap_tracked_chunk *c)
127{
128 unsigned long flags;
129
130 spin_lock_irqsave(&s->tracked_chunk_lock, flags);
131 hlist_del(&c->node);
132 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
133
134 mempool_free(c, s->tracked_chunk_pool);
135}
136
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137/*
138 * One of these per registered origin, held in the snapshot_origins hash
139 */
140struct origin {
141 /* The origin device */
142 struct block_device *bdev;
143
144 struct list_head hash_list;
145
146 /* List of snapshots for this origin */
147 struct list_head snapshots;
148};
149
150/*
151 * Size of the hash table for origin volumes. If we make this
152 * the size of the minors list then it should be nearly perfect
153 */
154#define ORIGIN_HASH_SIZE 256
155#define ORIGIN_MASK 0xFF
156static struct list_head *_origins;
157static struct rw_semaphore _origins_lock;
158
159static int init_origin_hash(void)
160{
161 int i;
162
163 _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
164 GFP_KERNEL);
165 if (!_origins) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700166 DMERR("unable to allocate memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 return -ENOMEM;
168 }
169
170 for (i = 0; i < ORIGIN_HASH_SIZE; i++)
171 INIT_LIST_HEAD(_origins + i);
172 init_rwsem(&_origins_lock);
173
174 return 0;
175}
176
177static void exit_origin_hash(void)
178{
179 kfree(_origins);
180}
181
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100182static unsigned origin_hash(struct block_device *bdev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183{
184 return bdev->bd_dev & ORIGIN_MASK;
185}
186
187static struct origin *__lookup_origin(struct block_device *origin)
188{
189 struct list_head *ol;
190 struct origin *o;
191
192 ol = &_origins[origin_hash(origin)];
193 list_for_each_entry (o, ol, hash_list)
194 if (bdev_equal(o->bdev, origin))
195 return o;
196
197 return NULL;
198}
199
200static void __insert_origin(struct origin *o)
201{
202 struct list_head *sl = &_origins[origin_hash(o->bdev)];
203 list_add_tail(&o->hash_list, sl);
204}
205
206/*
207 * Make a note of the snapshot and its origin so we can look it
208 * up when the origin has a write on it.
209 */
210static int register_snapshot(struct dm_snapshot *snap)
211{
212 struct origin *o;
213 struct block_device *bdev = snap->origin->bdev;
214
215 down_write(&_origins_lock);
216 o = __lookup_origin(bdev);
217
218 if (!o) {
219 /* New origin */
220 o = kmalloc(sizeof(*o), GFP_KERNEL);
221 if (!o) {
222 up_write(&_origins_lock);
223 return -ENOMEM;
224 }
225
226 /* Initialise the struct */
227 INIT_LIST_HEAD(&o->snapshots);
228 o->bdev = bdev;
229
230 __insert_origin(o);
231 }
232
233 list_add_tail(&snap->list, &o->snapshots);
234
235 up_write(&_origins_lock);
236 return 0;
237}
238
239static void unregister_snapshot(struct dm_snapshot *s)
240{
241 struct origin *o;
242
243 down_write(&_origins_lock);
244 o = __lookup_origin(s->origin->bdev);
245
246 list_del(&s->list);
247 if (list_empty(&o->snapshots)) {
248 list_del(&o->hash_list);
249 kfree(o);
250 }
251
252 up_write(&_origins_lock);
253}
254
255/*
256 * Implementation of the exception hash tables.
Milan Brozd74f81f2008-02-08 02:11:27 +0000257 * The lowest hash_shift bits of the chunk number are ignored, allowing
258 * some consecutive chunks to be grouped together.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 */
Milan Brozd74f81f2008-02-08 02:11:27 +0000260static int init_exception_table(struct exception_table *et, uint32_t size,
261 unsigned hash_shift)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262{
263 unsigned int i;
264
Milan Brozd74f81f2008-02-08 02:11:27 +0000265 et->hash_shift = hash_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 et->hash_mask = size - 1;
267 et->table = dm_vcalloc(size, sizeof(struct list_head));
268 if (!et->table)
269 return -ENOMEM;
270
271 for (i = 0; i < size; i++)
272 INIT_LIST_HEAD(et->table + i);
273
274 return 0;
275}
276
Christoph Lametere18b8902006-12-06 20:33:20 -0800277static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278{
279 struct list_head *slot;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100280 struct dm_snap_exception *ex, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 int i, size;
282
283 size = et->hash_mask + 1;
284 for (i = 0; i < size; i++) {
285 slot = et->table + i;
286
287 list_for_each_entry_safe (ex, next, slot, hash_list)
288 kmem_cache_free(mem, ex);
289 }
290
291 vfree(et->table);
292}
293
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100294static uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295{
Milan Brozd74f81f2008-02-08 02:11:27 +0000296 return (chunk >> et->hash_shift) & et->hash_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297}
298
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100299static void insert_exception(struct exception_table *eh,
300 struct dm_snap_exception *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301{
302 struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
303 list_add(&e->hash_list, l);
304}
305
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100306static void remove_exception(struct dm_snap_exception *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307{
308 list_del(&e->hash_list);
309}
310
311/*
312 * Return the exception data for a sector, or NULL if not
313 * remapped.
314 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100315static struct dm_snap_exception *lookup_exception(struct exception_table *et,
316 chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317{
318 struct list_head *slot;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100319 struct dm_snap_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
321 slot = &et->table[exception_hash(et, chunk)];
322 list_for_each_entry (e, slot, hash_list)
Milan Brozd74f81f2008-02-08 02:11:27 +0000323 if (chunk >= e->old_chunk &&
324 chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 return e;
326
327 return NULL;
328}
329
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100330static struct dm_snap_exception *alloc_exception(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100332 struct dm_snap_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
334 e = kmem_cache_alloc(exception_cache, GFP_NOIO);
335 if (!e)
336 e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
337
338 return e;
339}
340
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100341static void free_exception(struct dm_snap_exception *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342{
343 kmem_cache_free(exception_cache, e);
344}
345
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100346static struct dm_snap_pending_exception *alloc_pending_exception(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347{
348 return mempool_alloc(pending_pool, GFP_NOIO);
349}
350
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100351static void free_pending_exception(struct dm_snap_pending_exception *pe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352{
353 mempool_free(pe, pending_pool);
354}
355
Milan Brozd74f81f2008-02-08 02:11:27 +0000356static void insert_completed_exception(struct dm_snapshot *s,
357 struct dm_snap_exception *new_e)
358{
359 struct exception_table *eh = &s->complete;
360 struct list_head *l;
361 struct dm_snap_exception *e = NULL;
362
363 l = &eh->table[exception_hash(eh, new_e->old_chunk)];
364
365 /* Add immediately if this table doesn't support consecutive chunks */
366 if (!eh->hash_shift)
367 goto out;
368
369 /* List is ordered by old_chunk */
370 list_for_each_entry_reverse(e, l, hash_list) {
371 /* Insert after an existing chunk? */
372 if (new_e->old_chunk == (e->old_chunk +
373 dm_consecutive_chunk_count(e) + 1) &&
374 new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
375 dm_consecutive_chunk_count(e) + 1)) {
376 dm_consecutive_chunk_count_inc(e);
377 free_exception(new_e);
378 return;
379 }
380
381 /* Insert before an existing chunk? */
382 if (new_e->old_chunk == (e->old_chunk - 1) &&
383 new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
384 dm_consecutive_chunk_count_inc(e);
385 e->old_chunk--;
386 e->new_chunk--;
387 free_exception(new_e);
388 return;
389 }
390
391 if (new_e->old_chunk > e->old_chunk)
392 break;
393 }
394
395out:
396 list_add(&new_e->hash_list, e ? &e->hash_list : l);
397}
398
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
400{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100401 struct dm_snap_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
403 e = alloc_exception();
404 if (!e)
405 return -ENOMEM;
406
407 e->old_chunk = old;
Milan Brozd74f81f2008-02-08 02:11:27 +0000408
409 /* Consecutive_count is implicitly initialised to zero */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 e->new_chunk = new;
Milan Brozd74f81f2008-02-08 02:11:27 +0000411
412 insert_completed_exception(s, e);
413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return 0;
415}
416
417/*
418 * Hard coded magic.
419 */
420static int calc_max_buckets(void)
421{
422 /* use a fixed size of 2MB */
423 unsigned long mem = 2 * 1024 * 1024;
424 mem /= sizeof(struct list_head);
425
426 return mem;
427}
428
429/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 * Allocate room for a suitable hash table.
431 */
432static int init_hash_tables(struct dm_snapshot *s)
433{
434 sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
435
436 /*
437 * Calculate based on the size of the original volume or
438 * the COW volume...
439 */
440 cow_dev_size = get_dev_size(s->cow->bdev);
441 origin_dev_size = get_dev_size(s->origin->bdev);
442 max_buckets = calc_max_buckets();
443
444 hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
445 hash_size = min(hash_size, max_buckets);
446
Robert P. J. Day8defd832008-02-08 02:10:06 +0000447 hash_size = rounddown_pow_of_two(hash_size);
Milan Brozd74f81f2008-02-08 02:11:27 +0000448 if (init_exception_table(&s->complete, hash_size,
449 DM_CHUNK_CONSECUTIVE_BITS))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 return -ENOMEM;
451
452 /*
453 * Allocate hash table for in-flight exceptions
454 * Make this smaller than the real hash table
455 */
456 hash_size >>= 3;
457 if (hash_size < 64)
458 hash_size = 64;
459
Milan Brozd74f81f2008-02-08 02:11:27 +0000460 if (init_exception_table(&s->pending, hash_size, 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 exit_exception_table(&s->complete, exception_cache);
462 return -ENOMEM;
463 }
464
465 return 0;
466}
467
468/*
469 * Round a number up to the nearest 'size' boundary. size must
470 * be a power of 2.
471 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100472static ulong round_up(ulong n, ulong size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473{
474 size--;
475 return (n + size) & ~size;
476}
477
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700478static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
479 char **error)
480{
481 unsigned long chunk_size;
482 char *value;
483
484 chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
485 if (*chunk_size_arg == '\0' || *value != '\0') {
486 *error = "Invalid chunk size";
487 return -EINVAL;
488 }
489
490 if (!chunk_size) {
491 s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
492 return 0;
493 }
494
495 /*
496 * Chunk size must be multiple of page size. Silently
497 * round up if it's not.
498 */
499 chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
500
501 /* Check chunk_size is a power of 2 */
vignesh babu6f3c3f02007-10-19 22:38:44 +0100502 if (!is_power_of_2(chunk_size)) {
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700503 *error = "Chunk size is not a power of 2";
504 return -EINVAL;
505 }
506
507 /* Validate the chunk size against the device block size */
508 if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
509 *error = "Chunk size is not a multiple of device blocksize";
510 return -EINVAL;
511 }
512
513 s->chunk_size = chunk_size;
514 s->chunk_mask = chunk_size - 1;
515 s->chunk_shift = ffs(chunk_size) - 1;
516
517 return 0;
518}
519
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520/*
521 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
522 */
523static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
524{
525 struct dm_snapshot *s;
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100526 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 int r = -EINVAL;
528 char persistent;
529 char *origin_path;
530 char *cow_path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700532 if (argc != 4) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700533 ti->error = "requires exactly 4 arguments";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 r = -EINVAL;
535 goto bad1;
536 }
537
538 origin_path = argv[0];
539 cow_path = argv[1];
540 persistent = toupper(*argv[2]);
541
542 if (persistent != 'P' && persistent != 'N') {
543 ti->error = "Persistent flag is not P or N";
544 r = -EINVAL;
545 goto bad1;
546 }
547
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 s = kmalloc(sizeof(*s), GFP_KERNEL);
549 if (s == NULL) {
550 ti->error = "Cannot allocate snapshot context private "
551 "structure";
552 r = -ENOMEM;
553 goto bad1;
554 }
555
556 r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
557 if (r) {
558 ti->error = "Cannot get origin device";
559 goto bad2;
560 }
561
562 r = dm_get_device(ti, cow_path, 0, 0,
563 FMODE_READ | FMODE_WRITE, &s->cow);
564 if (r) {
565 dm_put_device(ti, s->origin);
566 ti->error = "Cannot get COW device";
567 goto bad2;
568 }
569
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700570 r = set_chunk_size(s, argv[3], &ti->error);
571 if (r)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 goto bad3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 s->type = persistent;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
576 s->valid = 1;
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800577 s->active = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 s->last_percent = 0;
579 init_rwsem(&s->lock);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700580 spin_lock_init(&s->pe_lock);
Mikulas Patocka72727ba2008-04-24 21:43:11 +0100581 s->ti = ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582
583 /* Allocate hash table for COW data */
584 if (init_hash_tables(s)) {
585 ti->error = "Unable to allocate hash table space";
586 r = -ENOMEM;
587 goto bad3;
588 }
589
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 s->store.snap = s;
591
592 if (persistent == 'P')
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700593 r = dm_create_persistent(&s->store);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 else
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700595 r = dm_create_transient(&s->store);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
597 if (r) {
598 ti->error = "Couldn't create exception store";
599 r = -EINVAL;
600 goto bad4;
601 }
602
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100603 r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 if (r) {
605 ti->error = "Could not create kcopyd client";
606 goto bad5;
607 }
608
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100609 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
610 tracked_chunk_cache);
611 if (!s->tracked_chunk_pool) {
612 ti->error = "Could not allocate tracked_chunk mempool for "
613 "tracking reads";
614 goto bad6;
615 }
616
617 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
618 INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
619
620 spin_lock_init(&s->tracked_chunk_lock);
621
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800622 /* Metadata must only be loaded into one table at once */
Mark McLoughlinf9cea4f2006-10-03 01:15:25 -0700623 r = s->store.read_metadata(&s->store);
Milan Broz07641472007-07-12 17:28:13 +0100624 if (r < 0) {
Mark McLoughlinf9cea4f2006-10-03 01:15:25 -0700625 ti->error = "Failed to read snapshot metadata";
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100626 goto bad_load_and_register;
Milan Broz07641472007-07-12 17:28:13 +0100627 } else if (r > 0) {
628 s->valid = 0;
629 DMWARN("Snapshot is marked invalid.");
Mark McLoughlinf9cea4f2006-10-03 01:15:25 -0700630 }
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800631
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700632 bio_list_init(&s->queued_bios);
David Howellsc4028952006-11-22 14:57:56 +0000633 INIT_WORK(&s->queued_bios_work, flush_queued_bios);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700634
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 /* Add snapshot to the list of snapshots for this origin */
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800636 /* Exceptions aren't triggered till snapshot_resume() is called */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 if (register_snapshot(s)) {
638 r = -EINVAL;
639 ti->error = "Cannot register snapshot origin";
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100640 goto bad_load_and_register;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 }
642
643 ti->private = s;
Alasdair G Kergonc51c2752006-06-26 00:27:18 -0700644 ti->split_io = s->chunk_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646 return 0;
647
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100648 bad_load_and_register:
649 mempool_destroy(s->tracked_chunk_pool);
650
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 bad6:
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100652 dm_kcopyd_client_destroy(s->kcopyd_client);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
654 bad5:
655 s->store.destroy(&s->store);
656
657 bad4:
658 exit_exception_table(&s->pending, pending_cache);
659 exit_exception_table(&s->complete, exception_cache);
660
661 bad3:
662 dm_put_device(ti, s->cow);
663 dm_put_device(ti, s->origin);
664
665 bad2:
666 kfree(s);
667
668 bad1:
669 return r;
670}
671
Milan Broz31c93a02006-12-08 02:41:11 -0800672static void __free_exceptions(struct dm_snapshot *s)
673{
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100674 dm_kcopyd_client_destroy(s->kcopyd_client);
Milan Broz31c93a02006-12-08 02:41:11 -0800675 s->kcopyd_client = NULL;
676
677 exit_exception_table(&s->pending, pending_cache);
678 exit_exception_table(&s->complete, exception_cache);
679
680 s->store.destroy(&s->store);
681}
682
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683static void snapshot_dtr(struct dm_target *ti)
684{
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100685#ifdef CONFIG_DM_DEBUG
686 int i;
687#endif
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100688 struct dm_snapshot *s = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700690 flush_workqueue(ksnapd);
691
Alasdair G Kergon138728d2006-03-27 01:17:50 -0800692 /* Prevent further origin writes from using this snapshot. */
693 /* After this returns there can be no new kcopyd jobs. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 unregister_snapshot(s);
695
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100696#ifdef CONFIG_DM_DEBUG
697 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
698 BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
699#endif
700
701 mempool_destroy(s->tracked_chunk_pool);
702
Milan Broz31c93a02006-12-08 02:41:11 -0800703 __free_exceptions(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
705 dm_put_device(ti, s->origin);
706 dm_put_device(ti, s->cow);
Alasdair G Kergon138728d2006-03-27 01:17:50 -0800707
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 kfree(s);
709}
710
711/*
712 * Flush a list of buffers.
713 */
714static void flush_bios(struct bio *bio)
715{
716 struct bio *n;
717
718 while (bio) {
719 n = bio->bi_next;
720 bio->bi_next = NULL;
721 generic_make_request(bio);
722 bio = n;
723 }
724}
725
David Howellsc4028952006-11-22 14:57:56 +0000726static void flush_queued_bios(struct work_struct *work)
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700727{
David Howellsc4028952006-11-22 14:57:56 +0000728 struct dm_snapshot *s =
729 container_of(work, struct dm_snapshot, queued_bios_work);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700730 struct bio *queued_bios;
731 unsigned long flags;
732
733 spin_lock_irqsave(&s->pe_lock, flags);
734 queued_bios = bio_list_get(&s->queued_bios);
735 spin_unlock_irqrestore(&s->pe_lock, flags);
736
737 flush_bios(queued_bios);
738}
739
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740/*
741 * Error a list of buffers.
742 */
743static void error_bios(struct bio *bio)
744{
745 struct bio *n;
746
747 while (bio) {
748 n = bio->bi_next;
749 bio->bi_next = NULL;
NeilBrown6712ecf2007-09-27 12:47:43 +0200750 bio_io_error(bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 bio = n;
752 }
753}
754
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700755static void __invalidate_snapshot(struct dm_snapshot *s, int err)
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800756{
757 if (!s->valid)
758 return;
759
760 if (err == -EIO)
761 DMERR("Invalidating snapshot: Error reading/writing.");
762 else if (err == -ENOMEM)
763 DMERR("Invalidating snapshot: Unable to allocate exception.");
764
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800765 if (s->store.drop_snapshot)
766 s->store.drop_snapshot(&s->store);
767
768 s->valid = 0;
769
Mikulas Patocka72727ba2008-04-24 21:43:11 +0100770 dm_table_event(s->ti->table);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800771}
772
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100773static void get_pending_exception(struct dm_snap_pending_exception *pe)
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700774{
775 atomic_inc(&pe->ref_count);
776}
777
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100778static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700779{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100780 struct dm_snap_pending_exception *primary_pe;
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700781 struct bio *origin_bios = NULL;
782
783 primary_pe = pe->primary_pe;
784
785 /*
786 * If this pe is involved in a write to the origin and
787 * it is the last sibling to complete then release
788 * the bios for the original write to the origin.
789 */
790 if (primary_pe &&
791 atomic_dec_and_test(&primary_pe->ref_count))
792 origin_bios = bio_list_get(&primary_pe->origin_bios);
793
794 /*
795 * Free the pe if it's not linked to an origin write or if
796 * it's not itself a primary pe.
797 */
798 if (!primary_pe || primary_pe != pe)
799 free_pending_exception(pe);
800
801 /*
802 * Free the primary pe if nothing references it.
803 */
804 if (primary_pe && !atomic_read(&primary_pe->ref_count))
805 free_pending_exception(primary_pe);
806
807 return origin_bios;
808}
809
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100810static void pending_complete(struct dm_snap_pending_exception *pe, int success)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100812 struct dm_snap_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 struct dm_snapshot *s = pe->snap;
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700814 struct bio *origin_bios = NULL;
815 struct bio *snapshot_bios = NULL;
816 int error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800818 if (!success) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 /* Read/write error - snapshot is unusable */
820 down_write(&s->lock);
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700821 __invalidate_snapshot(s, -EIO);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700822 error = 1;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800823 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 }
825
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800826 e = alloc_exception();
827 if (!e) {
828 down_write(&s->lock);
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700829 __invalidate_snapshot(s, -ENOMEM);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700830 error = 1;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800831 goto out;
832 }
833 *e = pe->e;
834
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700835 down_write(&s->lock);
836 if (!s->valid) {
837 free_exception(e);
838 error = 1;
839 goto out;
840 }
841
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800842 /*
843 * Add a proper exception, and remove the
844 * in-flight exception from the list.
845 */
Milan Brozd74f81f2008-02-08 02:11:27 +0000846 insert_completed_exception(s, e);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800847
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 out:
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700849 remove_exception(&pe->e);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700850 snapshot_bios = bio_list_get(&pe->snapshot_bios);
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700851 origin_bios = put_pending_exception(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700853 up_write(&s->lock);
854
855 /* Submit any pending write bios */
856 if (error)
857 error_bios(snapshot_bios);
858 else
859 flush_bios(snapshot_bios);
860
861 flush_bios(origin_bios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862}
863
864static void commit_callback(void *context, int success)
865{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100866 struct dm_snap_pending_exception *pe = context;
867
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 pending_complete(pe, success);
869}
870
871/*
872 * Called when the copy I/O has finished. kcopyd actually runs
873 * this code so don't block.
874 */
Alasdair G Kergon4cdc1d12008-03-28 14:16:10 -0700875static void copy_callback(int read_err, unsigned long write_err, void *context)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100877 struct dm_snap_pending_exception *pe = context;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 struct dm_snapshot *s = pe->snap;
879
880 if (read_err || write_err)
881 pending_complete(pe, 0);
882
883 else
884 /* Update the metadata if we are persistent */
885 s->store.commit_exception(&s->store, &pe->e, commit_callback,
886 pe);
887}
888
889/*
890 * Dispatches the copy operation to kcopyd.
891 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100892static void start_copy(struct dm_snap_pending_exception *pe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893{
894 struct dm_snapshot *s = pe->snap;
Heinz Mauelshagen22a1ceb2008-04-24 21:43:17 +0100895 struct dm_io_region src, dest;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 struct block_device *bdev = s->origin->bdev;
897 sector_t dev_size;
898
899 dev_size = get_dev_size(bdev);
900
901 src.bdev = bdev;
902 src.sector = chunk_to_sector(s, pe->e.old_chunk);
903 src.count = min(s->chunk_size, dev_size - src.sector);
904
905 dest.bdev = s->cow->bdev;
906 dest.sector = chunk_to_sector(s, pe->e.new_chunk);
907 dest.count = src.count;
908
909 /* Hand over to kcopyd */
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100910 dm_kcopyd_copy(s->kcopyd_client,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 &src, 1, &dest, 0, copy_callback, pe);
912}
913
914/*
915 * Looks to see if this snapshot already has a pending exception
916 * for this chunk, otherwise it allocates a new one and inserts
917 * it into the pending table.
918 *
919 * NOTE: a write lock must be held on snap->lock before calling
920 * this.
921 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100922static struct dm_snap_pending_exception *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
924{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100925 struct dm_snap_exception *e;
926 struct dm_snap_pending_exception *pe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
928
929 /*
930 * Is there a pending exception for this already ?
931 */
932 e = lookup_exception(&s->pending, chunk);
933 if (e) {
934 /* cast the exception to a pending exception */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100935 pe = container_of(e, struct dm_snap_pending_exception, e);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800936 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 }
938
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800939 /*
940 * Create a new pending exception, we don't want
941 * to hold the lock while we do this.
942 */
943 up_write(&s->lock);
944 pe = alloc_pending_exception();
945 down_write(&s->lock);
946
947 if (!s->valid) {
948 free_pending_exception(pe);
949 return NULL;
950 }
951
952 e = lookup_exception(&s->pending, chunk);
953 if (e) {
954 free_pending_exception(pe);
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100955 pe = container_of(e, struct dm_snap_pending_exception, e);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800956 goto out;
957 }
958
959 pe->e.old_chunk = chunk;
960 bio_list_init(&pe->origin_bios);
961 bio_list_init(&pe->snapshot_bios);
962 pe->primary_pe = NULL;
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700963 atomic_set(&pe->ref_count, 0);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800964 pe->snap = s;
965 pe->started = 0;
966
967 if (s->store.prepare_exception(&s->store, &pe->e)) {
968 free_pending_exception(pe);
969 return NULL;
970 }
971
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700972 get_pending_exception(pe);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800973 insert_exception(&s->pending, &pe->e);
974
975 out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 return pe;
977}
978
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100979static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
Milan Brozd74f81f2008-02-08 02:11:27 +0000980 struct bio *bio, chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981{
982 bio->bi_bdev = s->cow->bdev;
Milan Brozd74f81f2008-02-08 02:11:27 +0000983 bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
984 (chunk - e->old_chunk)) +
985 (bio->bi_sector & s->chunk_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986}
987
988static int snapshot_map(struct dm_target *ti, struct bio *bio,
989 union map_info *map_context)
990{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100991 struct dm_snap_exception *e;
992 struct dm_snapshot *s = ti->private;
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800993 int r = DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 chunk_t chunk;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100995 struct dm_snap_pending_exception *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 chunk = sector_to_chunk(s, bio->bi_sector);
998
999 /* Full snapshots are not usable */
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001000 /* To get here the table must be live so s->active is always set. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 if (!s->valid)
Alasdair G Kergonf6a80ea2005-07-12 15:53:01 -07001002 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001004 /* FIXME: should only take write lock if we need
1005 * to copy an exception */
1006 down_write(&s->lock);
1007
1008 if (!s->valid) {
1009 r = -EIO;
1010 goto out_unlock;
1011 }
1012
1013 /* If the block is already remapped - use that, else remap it */
1014 e = lookup_exception(&s->complete, chunk);
1015 if (e) {
Milan Brozd74f81f2008-02-08 02:11:27 +00001016 remap_exception(s, e, bio, chunk);
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001017 goto out_unlock;
1018 }
1019
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 /*
1021 * Write to snapshot - higher level takes care of RW/RO
1022 * flags so we should only get this if we are
1023 * writeable.
1024 */
1025 if (bio_rw(bio) == WRITE) {
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001026 pe = __find_pending_exception(s, bio);
1027 if (!pe) {
Alasdair G Kergon695368a2006-10-03 01:15:31 -07001028 __invalidate_snapshot(s, -ENOMEM);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001029 r = -EIO;
1030 goto out_unlock;
1031 }
1032
Milan Brozd74f81f2008-02-08 02:11:27 +00001033 remap_exception(s, &pe->e, bio, chunk);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001034 bio_list_add(&pe->snapshot_bios, bio);
1035
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001036 r = DM_MAPIO_SUBMITTED;
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001037
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001038 if (!pe->started) {
1039 /* this is protected by snap->lock */
1040 pe->started = 1;
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001041 up_write(&s->lock);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001042 start_copy(pe);
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001043 goto out;
1044 }
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001045 } else {
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001046 bio->bi_bdev = s->origin->bdev;
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001047 map_context->ptr = track_chunk(s, chunk);
1048 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001050 out_unlock:
1051 up_write(&s->lock);
1052 out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 return r;
1054}
1055
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001056static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1057 int error, union map_info *map_context)
1058{
1059 struct dm_snapshot *s = ti->private;
1060 struct dm_snap_tracked_chunk *c = map_context->ptr;
1061
1062 if (c)
1063 stop_tracking_chunk(s, c);
1064
1065 return 0;
1066}
1067
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068static void snapshot_resume(struct dm_target *ti)
1069{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001070 struct dm_snapshot *s = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -08001072 down_write(&s->lock);
1073 s->active = 1;
1074 up_write(&s->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075}
1076
1077static int snapshot_status(struct dm_target *ti, status_type_t type,
1078 char *result, unsigned int maxlen)
1079{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001080 struct dm_snapshot *snap = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
1082 switch (type) {
1083 case STATUSTYPE_INFO:
1084 if (!snap->valid)
1085 snprintf(result, maxlen, "Invalid");
1086 else {
1087 if (snap->store.fraction_full) {
1088 sector_t numerator, denominator;
1089 snap->store.fraction_full(&snap->store,
1090 &numerator,
1091 &denominator);
Andrew Morton4ee218c2006-03-27 01:17:48 -08001092 snprintf(result, maxlen, "%llu/%llu",
1093 (unsigned long long)numerator,
1094 (unsigned long long)denominator);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 }
1096 else
1097 snprintf(result, maxlen, "Unknown");
1098 }
1099 break;
1100
1101 case STATUSTYPE_TABLE:
1102 /*
1103 * kdevname returns a static pointer so we need
1104 * to make private copies if the output is to
1105 * make sense.
1106 */
Andrew Morton4ee218c2006-03-27 01:17:48 -08001107 snprintf(result, maxlen, "%s %s %c %llu",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 snap->origin->name, snap->cow->name,
Andrew Morton4ee218c2006-03-27 01:17:48 -08001109 snap->type,
1110 (unsigned long long)snap->chunk_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 break;
1112 }
1113
1114 return 0;
1115}
1116
1117/*-----------------------------------------------------------------
1118 * Origin methods
1119 *---------------------------------------------------------------*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120static int __origin_write(struct list_head *snapshots, struct bio *bio)
1121{
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001122 int r = DM_MAPIO_REMAPPED, first = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 struct dm_snapshot *snap;
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001124 struct dm_snap_exception *e;
1125 struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 chunk_t chunk;
Alasdair G Kergoneccf0812006-03-27 01:17:42 -08001127 LIST_HEAD(pe_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128
1129 /* Do all the snapshots on this origin */
1130 list_for_each_entry (snap, snapshots, list) {
1131
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001132 down_write(&snap->lock);
1133
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -08001134 /* Only deal with valid and active snapshots */
1135 if (!snap->valid || !snap->active)
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001136 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137
Alasdair G Kergond5e404c2005-07-12 15:53:05 -07001138 /* Nothing to do if writing beyond end of snapshot */
Mikulas Patocka72727ba2008-04-24 21:43:11 +01001139 if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001140 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
1142 /*
1143 * Remember, different snapshots can have
1144 * different chunk sizes.
1145 */
1146 chunk = sector_to_chunk(snap, bio->bi_sector);
1147
1148 /*
1149 * Check exception table to see if block
1150 * is already remapped in this snapshot
1151 * and trigger an exception if not.
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001152 *
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001153 * ref_count is initialised to 1 so pending_complete()
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001154 * won't destroy the primary_pe while we're inside this loop.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 */
1156 e = lookup_exception(&snap->complete, chunk);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001157 if (e)
1158 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001160 pe = __find_pending_exception(snap, bio);
1161 if (!pe) {
Alasdair G Kergon695368a2006-10-03 01:15:31 -07001162 __invalidate_snapshot(snap, -ENOMEM);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001163 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 }
1165
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001166 if (!primary_pe) {
1167 /*
1168 * Either every pe here has same
1169 * primary_pe or none has one yet.
1170 */
1171 if (pe->primary_pe)
1172 primary_pe = pe->primary_pe;
1173 else {
1174 primary_pe = pe;
1175 first = 1;
1176 }
1177
1178 bio_list_add(&primary_pe->origin_bios, bio);
1179
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001180 r = DM_MAPIO_SUBMITTED;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001181 }
1182
1183 if (!pe->primary_pe) {
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001184 pe->primary_pe = primary_pe;
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001185 get_pending_exception(primary_pe);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001186 }
1187
1188 if (!pe->started) {
1189 pe->started = 1;
1190 list_add_tail(&pe->list, &pe_queue);
1191 }
1192
1193 next_snapshot:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 up_write(&snap->lock);
1195 }
1196
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001197 if (!primary_pe)
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001198 return r;
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001199
1200 /*
1201 * If this is the first time we're processing this chunk and
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001202 * ref_count is now 1 it means all the pending exceptions
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001203 * got completed while we were in the loop above, so it falls to
1204 * us here to remove the primary_pe and submit any origin_bios.
1205 */
1206
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001207 if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001208 flush_bios(bio_list_get(&primary_pe->origin_bios));
1209 free_pending_exception(primary_pe);
1210 /* If we got here, pe_queue is necessarily empty. */
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001211 return r;
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001212 }
1213
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 /*
1215 * Now that we have a complete pe list we can start the copying.
1216 */
Alasdair G Kergoneccf0812006-03-27 01:17:42 -08001217 list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
1218 start_copy(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
1220 return r;
1221}
1222
1223/*
1224 * Called on a write from the origin driver.
1225 */
1226static int do_origin(struct dm_dev *origin, struct bio *bio)
1227{
1228 struct origin *o;
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001229 int r = DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230
1231 down_read(&_origins_lock);
1232 o = __lookup_origin(origin->bdev);
1233 if (o)
1234 r = __origin_write(&o->snapshots, bio);
1235 up_read(&_origins_lock);
1236
1237 return r;
1238}
1239
1240/*
1241 * Origin: maps a linear range of a device, with hooks for snapshotting.
1242 */
1243
1244/*
1245 * Construct an origin mapping: <dev_path>
1246 * The context for an origin is merely a 'struct dm_dev *'
1247 * pointing to the real device.
1248 */
1249static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1250{
1251 int r;
1252 struct dm_dev *dev;
1253
1254 if (argc != 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001255 ti->error = "origin: incorrect number of arguments";
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 return -EINVAL;
1257 }
1258
1259 r = dm_get_device(ti, argv[0], 0, ti->len,
1260 dm_table_get_mode(ti->table), &dev);
1261 if (r) {
1262 ti->error = "Cannot get target device";
1263 return r;
1264 }
1265
1266 ti->private = dev;
1267 return 0;
1268}
1269
1270static void origin_dtr(struct dm_target *ti)
1271{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001272 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 dm_put_device(ti, dev);
1274}
1275
1276static int origin_map(struct dm_target *ti, struct bio *bio,
1277 union map_info *map_context)
1278{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001279 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 bio->bi_bdev = dev->bdev;
1281
1282 /* Only tell snapshots if this is a write */
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001283 return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284}
1285
1286#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
1287
1288/*
1289 * Set the target "split_io" field to the minimum of all the snapshots'
1290 * chunk sizes.
1291 */
1292static void origin_resume(struct dm_target *ti)
1293{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001294 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 struct dm_snapshot *snap;
1296 struct origin *o;
1297 chunk_t chunk_size = 0;
1298
1299 down_read(&_origins_lock);
1300 o = __lookup_origin(dev->bdev);
1301 if (o)
1302 list_for_each_entry (snap, &o->snapshots, list)
1303 chunk_size = min_not_zero(chunk_size, snap->chunk_size);
1304 up_read(&_origins_lock);
1305
1306 ti->split_io = chunk_size;
1307}
1308
1309static int origin_status(struct dm_target *ti, status_type_t type, char *result,
1310 unsigned int maxlen)
1311{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001312 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313
1314 switch (type) {
1315 case STATUSTYPE_INFO:
1316 result[0] = '\0';
1317 break;
1318
1319 case STATUSTYPE_TABLE:
1320 snprintf(result, maxlen, "%s", dev->name);
1321 break;
1322 }
1323
1324 return 0;
1325}
1326
1327static struct target_type origin_target = {
1328 .name = "snapshot-origin",
Milan Brozd74f81f2008-02-08 02:11:27 +00001329 .version = {1, 6, 0},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 .module = THIS_MODULE,
1331 .ctr = origin_ctr,
1332 .dtr = origin_dtr,
1333 .map = origin_map,
1334 .resume = origin_resume,
1335 .status = origin_status,
1336};
1337
1338static struct target_type snapshot_target = {
1339 .name = "snapshot",
Milan Brozd74f81f2008-02-08 02:11:27 +00001340 .version = {1, 6, 0},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 .module = THIS_MODULE,
1342 .ctr = snapshot_ctr,
1343 .dtr = snapshot_dtr,
1344 .map = snapshot_map,
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001345 .end_io = snapshot_end_io,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 .resume = snapshot_resume,
1347 .status = snapshot_status,
1348};
1349
1350static int __init dm_snapshot_init(void)
1351{
1352 int r;
1353
1354 r = dm_register_target(&snapshot_target);
1355 if (r) {
1356 DMERR("snapshot target register failed %d", r);
1357 return r;
1358 }
1359
1360 r = dm_register_target(&origin_target);
1361 if (r < 0) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001362 DMERR("Origin target register failed %d", r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 goto bad1;
1364 }
1365
1366 r = init_origin_hash();
1367 if (r) {
1368 DMERR("init_origin_hash failed.");
1369 goto bad2;
1370 }
1371
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001372 exception_cache = KMEM_CACHE(dm_snap_exception, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 if (!exception_cache) {
1374 DMERR("Couldn't create exception cache.");
1375 r = -ENOMEM;
1376 goto bad3;
1377 }
1378
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001379 pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 if (!pending_cache) {
1381 DMERR("Couldn't create pending cache.");
1382 r = -ENOMEM;
1383 goto bad4;
1384 }
1385
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001386 tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
1387 if (!tracked_chunk_cache) {
1388 DMERR("Couldn't create cache to track chunks in use.");
1389 r = -ENOMEM;
1390 goto bad5;
1391 }
1392
Matthew Dobson93d23412006-03-26 01:37:50 -08001393 pending_pool = mempool_create_slab_pool(128, pending_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 if (!pending_pool) {
1395 DMERR("Couldn't create pending pool.");
1396 r = -ENOMEM;
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001397 goto bad_pending_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 }
1399
Alasdair G Kergonca3a9312006-10-03 01:15:30 -07001400 ksnapd = create_singlethread_workqueue("ksnapd");
1401 if (!ksnapd) {
1402 DMERR("Failed to create ksnapd workqueue.");
1403 r = -ENOMEM;
1404 goto bad6;
1405 }
1406
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 return 0;
1408
Alasdair G Kergonca3a9312006-10-03 01:15:30 -07001409 bad6:
1410 mempool_destroy(pending_pool);
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001411 bad_pending_pool:
1412 kmem_cache_destroy(tracked_chunk_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 bad5:
1414 kmem_cache_destroy(pending_cache);
1415 bad4:
1416 kmem_cache_destroy(exception_cache);
1417 bad3:
1418 exit_origin_hash();
1419 bad2:
1420 dm_unregister_target(&origin_target);
1421 bad1:
1422 dm_unregister_target(&snapshot_target);
1423 return r;
1424}
1425
1426static void __exit dm_snapshot_exit(void)
1427{
1428 int r;
1429
Alasdair G Kergonca3a9312006-10-03 01:15:30 -07001430 destroy_workqueue(ksnapd);
1431
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 r = dm_unregister_target(&snapshot_target);
1433 if (r)
1434 DMERR("snapshot unregister failed %d", r);
1435
1436 r = dm_unregister_target(&origin_target);
1437 if (r)
1438 DMERR("origin unregister failed %d", r);
1439
1440 exit_origin_hash();
1441 mempool_destroy(pending_pool);
1442 kmem_cache_destroy(pending_cache);
1443 kmem_cache_destroy(exception_cache);
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001444 kmem_cache_destroy(tracked_chunk_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445}
1446
1447/* Module hooks */
1448module_init(dm_snapshot_init);
1449module_exit(dm_snapshot_exit);
1450
1451MODULE_DESCRIPTION(DM_NAME " snapshot target");
1452MODULE_AUTHOR("Joe Thornber");
1453MODULE_LICENSE("GPL");