blob: dc500a6f6232cd8c1c2701549232f5965d184326 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * dm-snapshot.c
3 *
4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
5 *
6 * This file is released under the GPL.
7 */
8
9#include <linux/blkdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include <linux/device-mapper.h>
Mikulas Patocka90fa1522009-01-06 03:04:54 +000011#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/fs.h>
13#include <linux/init.h>
14#include <linux/kdev_t.h>
15#include <linux/list.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
vignesh babu6f3c3f02007-10-19 22:38:44 +010020#include <linux/log2.h>
Alasdair G Kergona765e202008-04-24 22:02:01 +010021#include <linux/dm-kcopyd.h>
Jonathan Brassowccc45ea2009-04-02 19:55:34 +010022#include <linux/workqueue.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
Jonathan Brassowaea53d92009-01-06 03:05:15 +000024#include "dm-exception-store.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
Alasdair G Kergon72d94862006-06-26 00:27:35 -070026#define DM_MSG_PREFIX "snapshots"
27
Linus Torvalds1da177e2005-04-16 15:20:36 -070028/*
29 * The percentage increment we will wake up users at
30 */
31#define WAKE_UP_PERCENT 5
32
33/*
34 * kcopyd priority of snapshot operations
35 */
36#define SNAPSHOT_COPY_PRIORITY 2
37
38/*
Milan Broz8ee27672008-04-24 21:42:36 +010039 * Reserve 1MB for each snapshot initially (with minimum of 1 page).
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 */
Milan Broz8ee27672008-04-24 21:42:36 +010041#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -070042
Mikulas Patockacd45daf2008-07-21 12:00:32 +010043/*
44 * The size of the mempool used to track chunks in use.
45 */
46#define MIN_IOS 256
47
Jonathan Brassowccc45ea2009-04-02 19:55:34 +010048#define DM_TRACKED_CHUNK_HASH_SIZE 16
49#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
50 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
51
Jon Brassow191437a2009-12-10 23:52:10 +000052struct dm_exception_table {
Jonathan Brassowccc45ea2009-04-02 19:55:34 +010053 uint32_t hash_mask;
54 unsigned hash_shift;
55 struct list_head *table;
56};
57
58struct dm_snapshot {
59 struct rw_semaphore lock;
60
61 struct dm_dev *origin;
Mike Snitzerfc56f6f2009-12-10 23:52:12 +000062 struct dm_dev *cow;
63
64 struct dm_target *ti;
Jonathan Brassowccc45ea2009-04-02 19:55:34 +010065
66 /* List of snapshots per Origin */
67 struct list_head list;
68
69 /* You can't use a snapshot if this is 0 (e.g. if full) */
70 int valid;
71
72 /* Origin writes don't trigger exceptions until this is set */
73 int active;
74
Jonathan Brassowccc45ea2009-04-02 19:55:34 +010075 mempool_t *pending_pool;
76
77 atomic_t pending_exceptions_count;
78
Jon Brassow191437a2009-12-10 23:52:10 +000079 struct dm_exception_table pending;
80 struct dm_exception_table complete;
Jonathan Brassowccc45ea2009-04-02 19:55:34 +010081
82 /*
83 * pe_lock protects all pending_exception operations and access
84 * as well as the snapshot_bios list.
85 */
86 spinlock_t pe_lock;
87
88 /* The on disk metadata handler */
89 struct dm_exception_store *store;
90
91 struct dm_kcopyd_client *kcopyd_client;
92
93 /* Queue of snapshot writes for ksnapd to flush */
94 struct bio_list queued_bios;
95 struct work_struct queued_bios_work;
96
97 /* Chunks with outstanding reads */
98 mempool_t *tracked_chunk_pool;
99 spinlock_t tracked_chunk_lock;
100 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
101};
102
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000103struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
104{
105 return s->cow;
106}
107EXPORT_SYMBOL(dm_snap_cow);
108
Adrian Bunkc642f9e2006-12-08 02:41:13 -0800109static struct workqueue_struct *ksnapd;
David Howellsc4028952006-11-22 14:57:56 +0000110static void flush_queued_bios(struct work_struct *work);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700111
Jonathan Brassowccc45ea2009-04-02 19:55:34 +0100112static sector_t chunk_to_sector(struct dm_exception_store *store,
113 chunk_t chunk)
114{
115 return chunk << store->chunk_shift;
116}
117
118static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
119{
120 /*
121 * There is only ever one instance of a particular block
122 * device so we can compare pointers safely.
123 */
124 return lhs == rhs;
125}
126
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100127struct dm_snap_pending_exception {
Jon Brassow1d4989c2009-12-10 23:52:10 +0000128 struct dm_exception e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130 /*
131 * Origin buffers waiting for this to complete are held
132 * in a bio list
133 */
134 struct bio_list origin_bios;
135 struct bio_list snapshot_bios;
136
137 /*
Alasdair G Kergoneccf0812006-03-27 01:17:42 -0800138 * Short-term queue of pending exceptions prior to submission.
139 */
140 struct list_head list;
141
142 /*
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -0800143 * The primary pending_exception is the one that holds
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700144 * the ref_count and the list of origin_bios for a
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -0800145 * group of pending_exceptions. It is always last to get freed.
146 * These fields get set up when writing to the origin.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100148 struct dm_snap_pending_exception *primary_pe;
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -0800149
150 /*
151 * Number of pending_exceptions processing this chunk.
152 * When this drops to zero we must complete the origin bios.
153 * If incrementing or decrementing this, hold pe->snap->lock for
154 * the sibling concerned and not pe->primary_pe->snap->lock unless
155 * they are the same.
156 */
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700157 atomic_t ref_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
159 /* Pointer back to snapshot context */
160 struct dm_snapshot *snap;
161
162 /*
163 * 1 indicates the exception has already been sent to
164 * kcopyd.
165 */
166 int started;
167};
168
169/*
170 * Hash table mapping origin volumes to lists of snapshots and
171 * a lock to protect it
172 */
Christoph Lametere18b8902006-12-06 20:33:20 -0800173static struct kmem_cache *exception_cache;
174static struct kmem_cache *pending_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100176struct dm_snap_tracked_chunk {
177 struct hlist_node node;
178 chunk_t chunk;
179};
180
181static struct kmem_cache *tracked_chunk_cache;
182
183static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
184 chunk_t chunk)
185{
186 struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
187 GFP_NOIO);
188 unsigned long flags;
189
190 c->chunk = chunk;
191
192 spin_lock_irqsave(&s->tracked_chunk_lock, flags);
193 hlist_add_head(&c->node,
194 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
195 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
196
197 return c;
198}
199
200static void stop_tracking_chunk(struct dm_snapshot *s,
201 struct dm_snap_tracked_chunk *c)
202{
203 unsigned long flags;
204
205 spin_lock_irqsave(&s->tracked_chunk_lock, flags);
206 hlist_del(&c->node);
207 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
208
209 mempool_free(c, s->tracked_chunk_pool);
210}
211
Mikulas Patockaa8d41b52008-07-21 12:00:34 +0100212static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
213{
214 struct dm_snap_tracked_chunk *c;
215 struct hlist_node *hn;
216 int found = 0;
217
218 spin_lock_irq(&s->tracked_chunk_lock);
219
220 hlist_for_each_entry(c, hn,
221 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
222 if (c->chunk == chunk) {
223 found = 1;
224 break;
225 }
226 }
227
228 spin_unlock_irq(&s->tracked_chunk_lock);
229
230 return found;
231}
232
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233/*
234 * One of these per registered origin, held in the snapshot_origins hash
235 */
236struct origin {
237 /* The origin device */
238 struct block_device *bdev;
239
240 struct list_head hash_list;
241
242 /* List of snapshots for this origin */
243 struct list_head snapshots;
244};
245
246/*
247 * Size of the hash table for origin volumes. If we make this
248 * the size of the minors list then it should be nearly perfect
249 */
250#define ORIGIN_HASH_SIZE 256
251#define ORIGIN_MASK 0xFF
252static struct list_head *_origins;
253static struct rw_semaphore _origins_lock;
254
255static int init_origin_hash(void)
256{
257 int i;
258
259 _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
260 GFP_KERNEL);
261 if (!_origins) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700262 DMERR("unable to allocate memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 return -ENOMEM;
264 }
265
266 for (i = 0; i < ORIGIN_HASH_SIZE; i++)
267 INIT_LIST_HEAD(_origins + i);
268 init_rwsem(&_origins_lock);
269
270 return 0;
271}
272
273static void exit_origin_hash(void)
274{
275 kfree(_origins);
276}
277
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100278static unsigned origin_hash(struct block_device *bdev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279{
280 return bdev->bd_dev & ORIGIN_MASK;
281}
282
283static struct origin *__lookup_origin(struct block_device *origin)
284{
285 struct list_head *ol;
286 struct origin *o;
287
288 ol = &_origins[origin_hash(origin)];
289 list_for_each_entry (o, ol, hash_list)
290 if (bdev_equal(o->bdev, origin))
291 return o;
292
293 return NULL;
294}
295
296static void __insert_origin(struct origin *o)
297{
298 struct list_head *sl = &_origins[origin_hash(o->bdev)];
299 list_add_tail(&o->hash_list, sl);
300}
301
302/*
303 * Make a note of the snapshot and its origin so we can look it
304 * up when the origin has a write on it.
305 */
306static int register_snapshot(struct dm_snapshot *snap)
307{
Mikulas Patocka6d45d93e2009-10-16 23:18:14 +0100308 struct dm_snapshot *l;
Mikulas Patocka60c856c82008-10-30 13:33:12 +0000309 struct origin *o, *new_o;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 struct block_device *bdev = snap->origin->bdev;
311
Mikulas Patocka60c856c82008-10-30 13:33:12 +0000312 new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
313 if (!new_o)
314 return -ENOMEM;
315
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 down_write(&_origins_lock);
317 o = __lookup_origin(bdev);
318
Mikulas Patocka60c856c82008-10-30 13:33:12 +0000319 if (o)
320 kfree(new_o);
321 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 /* New origin */
Mikulas Patocka60c856c82008-10-30 13:33:12 +0000323 o = new_o;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324
325 /* Initialise the struct */
326 INIT_LIST_HEAD(&o->snapshots);
327 o->bdev = bdev;
328
329 __insert_origin(o);
330 }
331
Mikulas Patocka6d45d93e2009-10-16 23:18:14 +0100332 /* Sort the list according to chunk size, largest-first smallest-last */
333 list_for_each_entry(l, &o->snapshots, list)
334 if (l->store->chunk_size < snap->store->chunk_size)
335 break;
336 list_add_tail(&snap->list, &l->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
338 up_write(&_origins_lock);
339 return 0;
340}
341
342static void unregister_snapshot(struct dm_snapshot *s)
343{
344 struct origin *o;
345
346 down_write(&_origins_lock);
347 o = __lookup_origin(s->origin->bdev);
348
349 list_del(&s->list);
350 if (list_empty(&o->snapshots)) {
351 list_del(&o->hash_list);
352 kfree(o);
353 }
354
355 up_write(&_origins_lock);
356}
357
358/*
359 * Implementation of the exception hash tables.
Milan Brozd74f81f2008-02-08 02:11:27 +0000360 * The lowest hash_shift bits of the chunk number are ignored, allowing
361 * some consecutive chunks to be grouped together.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 */
Jon Brassow3510cb92009-12-10 23:52:11 +0000363static int dm_exception_table_init(struct dm_exception_table *et,
364 uint32_t size, unsigned hash_shift)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365{
366 unsigned int i;
367
Milan Brozd74f81f2008-02-08 02:11:27 +0000368 et->hash_shift = hash_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 et->hash_mask = size - 1;
370 et->table = dm_vcalloc(size, sizeof(struct list_head));
371 if (!et->table)
372 return -ENOMEM;
373
374 for (i = 0; i < size; i++)
375 INIT_LIST_HEAD(et->table + i);
376
377 return 0;
378}
379
Jon Brassow3510cb92009-12-10 23:52:11 +0000380static void dm_exception_table_exit(struct dm_exception_table *et,
381 struct kmem_cache *mem)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382{
383 struct list_head *slot;
Jon Brassow1d4989c2009-12-10 23:52:10 +0000384 struct dm_exception *ex, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 int i, size;
386
387 size = et->hash_mask + 1;
388 for (i = 0; i < size; i++) {
389 slot = et->table + i;
390
391 list_for_each_entry_safe (ex, next, slot, hash_list)
392 kmem_cache_free(mem, ex);
393 }
394
395 vfree(et->table);
396}
397
Jon Brassow191437a2009-12-10 23:52:10 +0000398static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399{
Milan Brozd74f81f2008-02-08 02:11:27 +0000400 return (chunk >> et->hash_shift) & et->hash_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401}
402
Jon Brassow3510cb92009-12-10 23:52:11 +0000403static void dm_remove_exception(struct dm_exception *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404{
405 list_del(&e->hash_list);
406}
407
408/*
409 * Return the exception data for a sector, or NULL if not
410 * remapped.
411 */
Jon Brassow3510cb92009-12-10 23:52:11 +0000412static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
413 chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414{
415 struct list_head *slot;
Jon Brassow1d4989c2009-12-10 23:52:10 +0000416 struct dm_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
418 slot = &et->table[exception_hash(et, chunk)];
419 list_for_each_entry (e, slot, hash_list)
Milan Brozd74f81f2008-02-08 02:11:27 +0000420 if (chunk >= e->old_chunk &&
421 chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 return e;
423
424 return NULL;
425}
426
Jon Brassow3510cb92009-12-10 23:52:11 +0000427static struct dm_exception *alloc_completed_exception(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Jon Brassow1d4989c2009-12-10 23:52:10 +0000429 struct dm_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 e = kmem_cache_alloc(exception_cache, GFP_NOIO);
432 if (!e)
433 e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
434
435 return e;
436}
437
Jon Brassow3510cb92009-12-10 23:52:11 +0000438static void free_completed_exception(struct dm_exception *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439{
440 kmem_cache_free(exception_cache, e);
441}
442
Mikulas Patocka92e86812008-07-21 12:00:35 +0100443static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444{
Mikulas Patocka92e86812008-07-21 12:00:35 +0100445 struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
446 GFP_NOIO);
447
Mikulas Patocka879129d22008-10-30 13:33:16 +0000448 atomic_inc(&s->pending_exceptions_count);
Mikulas Patocka92e86812008-07-21 12:00:35 +0100449 pe->snap = s;
450
451 return pe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452}
453
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100454static void free_pending_exception(struct dm_snap_pending_exception *pe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455{
Mikulas Patocka879129d22008-10-30 13:33:16 +0000456 struct dm_snapshot *s = pe->snap;
457
458 mempool_free(pe, s->pending_pool);
459 smp_mb__before_atomic_dec();
460 atomic_dec(&s->pending_exceptions_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461}
462
Jon Brassow3510cb92009-12-10 23:52:11 +0000463static void dm_insert_exception(struct dm_exception_table *eh,
464 struct dm_exception *new_e)
Milan Brozd74f81f2008-02-08 02:11:27 +0000465{
Milan Brozd74f81f2008-02-08 02:11:27 +0000466 struct list_head *l;
Jon Brassow1d4989c2009-12-10 23:52:10 +0000467 struct dm_exception *e = NULL;
Milan Brozd74f81f2008-02-08 02:11:27 +0000468
469 l = &eh->table[exception_hash(eh, new_e->old_chunk)];
470
471 /* Add immediately if this table doesn't support consecutive chunks */
472 if (!eh->hash_shift)
473 goto out;
474
475 /* List is ordered by old_chunk */
476 list_for_each_entry_reverse(e, l, hash_list) {
477 /* Insert after an existing chunk? */
478 if (new_e->old_chunk == (e->old_chunk +
479 dm_consecutive_chunk_count(e) + 1) &&
480 new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
481 dm_consecutive_chunk_count(e) + 1)) {
482 dm_consecutive_chunk_count_inc(e);
Jon Brassow3510cb92009-12-10 23:52:11 +0000483 free_completed_exception(new_e);
Milan Brozd74f81f2008-02-08 02:11:27 +0000484 return;
485 }
486
487 /* Insert before an existing chunk? */
488 if (new_e->old_chunk == (e->old_chunk - 1) &&
489 new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
490 dm_consecutive_chunk_count_inc(e);
491 e->old_chunk--;
492 e->new_chunk--;
Jon Brassow3510cb92009-12-10 23:52:11 +0000493 free_completed_exception(new_e);
Milan Brozd74f81f2008-02-08 02:11:27 +0000494 return;
495 }
496
497 if (new_e->old_chunk > e->old_chunk)
498 break;
499 }
500
501out:
502 list_add(&new_e->hash_list, e ? &e->hash_list : l);
503}
504
Jonathan Brassowa159c1a2009-01-06 03:05:19 +0000505/*
506 * Callback used by the exception stores to load exceptions when
507 * initialising.
508 */
509static int dm_add_exception(void *context, chunk_t old, chunk_t new)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510{
Jonathan Brassowa159c1a2009-01-06 03:05:19 +0000511 struct dm_snapshot *s = context;
Jon Brassow1d4989c2009-12-10 23:52:10 +0000512 struct dm_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
Jon Brassow3510cb92009-12-10 23:52:11 +0000514 e = alloc_completed_exception();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 if (!e)
516 return -ENOMEM;
517
518 e->old_chunk = old;
Milan Brozd74f81f2008-02-08 02:11:27 +0000519
520 /* Consecutive_count is implicitly initialised to zero */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 e->new_chunk = new;
Milan Brozd74f81f2008-02-08 02:11:27 +0000522
Jon Brassow3510cb92009-12-10 23:52:11 +0000523 dm_insert_exception(&s->complete, e);
Milan Brozd74f81f2008-02-08 02:11:27 +0000524
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 return 0;
526}
527
Mikulas Patocka7e201b32009-12-10 23:52:08 +0000528#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
529
530/*
531 * Return a minimum chunk size of all snapshots that have the specified origin.
532 * Return zero if the origin has no snapshots.
533 */
534static sector_t __minimum_chunk_size(struct origin *o)
535{
536 struct dm_snapshot *snap;
537 unsigned chunk_size = 0;
538
539 if (o)
540 list_for_each_entry(snap, &o->snapshots, list)
541 chunk_size = min_not_zero(chunk_size,
542 snap->store->chunk_size);
543
544 return chunk_size;
545}
546
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547/*
548 * Hard coded magic.
549 */
550static int calc_max_buckets(void)
551{
552 /* use a fixed size of 2MB */
553 unsigned long mem = 2 * 1024 * 1024;
554 mem /= sizeof(struct list_head);
555
556 return mem;
557}
558
559/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 * Allocate room for a suitable hash table.
561 */
Jonathan Brassowfee19982009-04-02 19:55:34 +0100562static int init_hash_tables(struct dm_snapshot *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563{
564 sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
565
566 /*
567 * Calculate based on the size of the original volume or
568 * the COW volume...
569 */
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000570 cow_dev_size = get_dev_size(s->cow->bdev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 origin_dev_size = get_dev_size(s->origin->bdev);
572 max_buckets = calc_max_buckets();
573
Jonathan Brassowfee19982009-04-02 19:55:34 +0100574 hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 hash_size = min(hash_size, max_buckets);
576
Mikulas Patocka8e87b9b2009-12-10 23:51:54 +0000577 if (hash_size < 64)
578 hash_size = 64;
Robert P. J. Day8defd832008-02-08 02:10:06 +0000579 hash_size = rounddown_pow_of_two(hash_size);
Jon Brassow3510cb92009-12-10 23:52:11 +0000580 if (dm_exception_table_init(&s->complete, hash_size,
581 DM_CHUNK_CONSECUTIVE_BITS))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 return -ENOMEM;
583
584 /*
585 * Allocate hash table for in-flight exceptions
586 * Make this smaller than the real hash table
587 */
588 hash_size >>= 3;
589 if (hash_size < 64)
590 hash_size = 64;
591
Jon Brassow3510cb92009-12-10 23:52:11 +0000592 if (dm_exception_table_init(&s->pending, hash_size, 0)) {
593 dm_exception_table_exit(&s->complete, exception_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 return -ENOMEM;
595 }
596
597 return 0;
598}
599
600/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
602 */
603static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
604{
605 struct dm_snapshot *s;
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100606 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 int r = -EINVAL;
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000608 char *origin_path, *cow_path;
Jonathan Brassowfee19982009-04-02 19:55:34 +0100609 unsigned args_used;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
Mark McLoughlin4c7e3bf2006-10-03 01:15:25 -0700611 if (argc != 4) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700612 ti->error = "requires exactly 4 arguments";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 r = -EINVAL;
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000614 goto bad;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 }
616
617 origin_path = argv[0];
Jonathan Brassowfee19982009-04-02 19:55:34 +0100618 argv++;
619 argc--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 s = kmalloc(sizeof(*s), GFP_KERNEL);
Jonathan Brassowfee19982009-04-02 19:55:34 +0100622 if (!s) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 ti->error = "Cannot allocate snapshot context private "
624 "structure";
625 r = -ENOMEM;
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000626 goto bad;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 }
628
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000629 cow_path = argv[0];
630 argv++;
631 argc--;
632
633 r = dm_get_device(ti, cow_path, 0, 0,
634 FMODE_READ | FMODE_WRITE, &s->cow);
635 if (r) {
636 ti->error = "Cannot get COW device";
637 goto bad_cow;
638 }
639
640 r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
641 if (r) {
642 ti->error = "Couldn't create exception store";
643 r = -EINVAL;
644 goto bad_store;
645 }
646
647 argv += args_used;
648 argc -= args_used;
649
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
651 if (r) {
652 ti->error = "Cannot get origin device";
Jonathan Brassowfee19982009-04-02 19:55:34 +0100653 goto bad_origin;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 }
655
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000656 s->ti = ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 s->valid = 1;
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800658 s->active = 0;
Mikulas Patocka879129d22008-10-30 13:33:16 +0000659 atomic_set(&s->pending_exceptions_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 init_rwsem(&s->lock);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700661 spin_lock_init(&s->pe_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663 /* Allocate hash table for COW data */
Jonathan Brassowfee19982009-04-02 19:55:34 +0100664 if (init_hash_tables(s)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 ti->error = "Unable to allocate hash table space";
666 r = -ENOMEM;
Jonathan Brassowfee19982009-04-02 19:55:34 +0100667 goto bad_hash_tables;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 }
669
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100670 r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 if (r) {
672 ti->error = "Could not create kcopyd client";
Jonathan Brassowfee19982009-04-02 19:55:34 +0100673 goto bad_kcopyd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 }
675
Mikulas Patocka92e86812008-07-21 12:00:35 +0100676 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
677 if (!s->pending_pool) {
678 ti->error = "Could not allocate mempool for pending exceptions";
Jonathan Brassowfee19982009-04-02 19:55:34 +0100679 goto bad_pending_pool;
Mikulas Patocka92e86812008-07-21 12:00:35 +0100680 }
681
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100682 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
683 tracked_chunk_cache);
684 if (!s->tracked_chunk_pool) {
685 ti->error = "Could not allocate tracked_chunk mempool for "
686 "tracking reads";
Mikulas Patocka92e86812008-07-21 12:00:35 +0100687 goto bad_tracked_chunk_pool;
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100688 }
689
690 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
691 INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
692
693 spin_lock_init(&s->tracked_chunk_lock);
694
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800695 /* Metadata must only be loaded into one table at once */
Jonathan Brassow493df712009-04-02 19:55:31 +0100696 r = s->store->type->read_metadata(s->store, dm_add_exception,
697 (void *)s);
Milan Broz07641472007-07-12 17:28:13 +0100698 if (r < 0) {
Mark McLoughlinf9cea4f2006-10-03 01:15:25 -0700699 ti->error = "Failed to read snapshot metadata";
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100700 goto bad_load_and_register;
Milan Broz07641472007-07-12 17:28:13 +0100701 } else if (r > 0) {
702 s->valid = 0;
703 DMWARN("Snapshot is marked invalid.");
Mark McLoughlinf9cea4f2006-10-03 01:15:25 -0700704 }
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800705
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700706 bio_list_init(&s->queued_bios);
David Howellsc4028952006-11-22 14:57:56 +0000707 INIT_WORK(&s->queued_bios_work, flush_queued_bios);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700708
Mikulas Patocka3f2412d2009-10-16 23:18:16 +0100709 if (!s->store->chunk_size) {
710 ti->error = "Chunk size not set";
711 goto bad_load_and_register;
712 }
713
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 /* Add snapshot to the list of snapshots for this origin */
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -0800715 /* Exceptions aren't triggered till snapshot_resume() is called */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 if (register_snapshot(s)) {
717 r = -EINVAL;
718 ti->error = "Cannot register snapshot origin";
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100719 goto bad_load_and_register;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 }
721
722 ti->private = s;
Jonathan Brassowd0216842009-04-02 19:55:32 +0100723 ti->split_io = s->store->chunk_size;
Mikulas Patocka494b3ee2009-06-22 10:12:25 +0100724 ti->num_flush_requests = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
726 return 0;
727
Jonathan Brassowfee19982009-04-02 19:55:34 +0100728bad_load_and_register:
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100729 mempool_destroy(s->tracked_chunk_pool);
730
Jonathan Brassowfee19982009-04-02 19:55:34 +0100731bad_tracked_chunk_pool:
Mikulas Patocka92e86812008-07-21 12:00:35 +0100732 mempool_destroy(s->pending_pool);
733
Jonathan Brassowfee19982009-04-02 19:55:34 +0100734bad_pending_pool:
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100735 dm_kcopyd_client_destroy(s->kcopyd_client);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Jonathan Brassowfee19982009-04-02 19:55:34 +0100737bad_kcopyd:
Jon Brassow3510cb92009-12-10 23:52:11 +0000738 dm_exception_table_exit(&s->pending, pending_cache);
739 dm_exception_table_exit(&s->complete, exception_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Jonathan Brassowfee19982009-04-02 19:55:34 +0100741bad_hash_tables:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 dm_put_device(ti, s->origin);
743
Jonathan Brassowfee19982009-04-02 19:55:34 +0100744bad_origin:
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000745 dm_exception_store_destroy(s->store);
746
747bad_store:
748 dm_put_device(ti, s->cow);
749
750bad_cow:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 kfree(s);
752
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000753bad:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 return r;
755}
756
Milan Broz31c93a0c2006-12-08 02:41:11 -0800757static void __free_exceptions(struct dm_snapshot *s)
758{
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +0100759 dm_kcopyd_client_destroy(s->kcopyd_client);
Milan Broz31c93a0c2006-12-08 02:41:11 -0800760 s->kcopyd_client = NULL;
761
Jon Brassow3510cb92009-12-10 23:52:11 +0000762 dm_exception_table_exit(&s->pending, pending_cache);
763 dm_exception_table_exit(&s->complete, exception_cache);
Milan Broz31c93a0c2006-12-08 02:41:11 -0800764}
765
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766static void snapshot_dtr(struct dm_target *ti)
767{
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100768#ifdef CONFIG_DM_DEBUG
769 int i;
770#endif
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100771 struct dm_snapshot *s = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700773 flush_workqueue(ksnapd);
774
Alasdair G Kergon138728dc2006-03-27 01:17:50 -0800775 /* Prevent further origin writes from using this snapshot. */
776 /* After this returns there can be no new kcopyd jobs. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 unregister_snapshot(s);
778
Mikulas Patocka879129d22008-10-30 13:33:16 +0000779 while (atomic_read(&s->pending_exceptions_count))
Mikulas Patocka90fa1522009-01-06 03:04:54 +0000780 msleep(1);
Mikulas Patocka879129d22008-10-30 13:33:16 +0000781 /*
782 * Ensure instructions in mempool_destroy aren't reordered
783 * before atomic_read.
784 */
785 smp_mb();
786
Mikulas Patockacd45daf2008-07-21 12:00:32 +0100787#ifdef CONFIG_DM_DEBUG
788 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
789 BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
790#endif
791
792 mempool_destroy(s->tracked_chunk_pool);
793
Milan Broz31c93a0c2006-12-08 02:41:11 -0800794 __free_exceptions(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
Mikulas Patocka92e86812008-07-21 12:00:35 +0100796 mempool_destroy(s->pending_pool);
797
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 dm_put_device(ti, s->origin);
Jonathan Brassowfee19982009-04-02 19:55:34 +0100799
800 dm_exception_store_destroy(s->store);
Alasdair G Kergon138728dc2006-03-27 01:17:50 -0800801
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000802 dm_put_device(ti, s->cow);
803
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 kfree(s);
805}
806
807/*
808 * Flush a list of buffers.
809 */
810static void flush_bios(struct bio *bio)
811{
812 struct bio *n;
813
814 while (bio) {
815 n = bio->bi_next;
816 bio->bi_next = NULL;
817 generic_make_request(bio);
818 bio = n;
819 }
820}
821
David Howellsc4028952006-11-22 14:57:56 +0000822static void flush_queued_bios(struct work_struct *work)
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700823{
David Howellsc4028952006-11-22 14:57:56 +0000824 struct dm_snapshot *s =
825 container_of(work, struct dm_snapshot, queued_bios_work);
Alasdair G Kergonca3a9312006-10-03 01:15:30 -0700826 struct bio *queued_bios;
827 unsigned long flags;
828
829 spin_lock_irqsave(&s->pe_lock, flags);
830 queued_bios = bio_list_get(&s->queued_bios);
831 spin_unlock_irqrestore(&s->pe_lock, flags);
832
833 flush_bios(queued_bios);
834}
835
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836/*
837 * Error a list of buffers.
838 */
839static void error_bios(struct bio *bio)
840{
841 struct bio *n;
842
843 while (bio) {
844 n = bio->bi_next;
845 bio->bi_next = NULL;
NeilBrown6712ecf2007-09-27 12:47:43 +0200846 bio_io_error(bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 bio = n;
848 }
849}
850
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700851static void __invalidate_snapshot(struct dm_snapshot *s, int err)
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800852{
853 if (!s->valid)
854 return;
855
856 if (err == -EIO)
857 DMERR("Invalidating snapshot: Error reading/writing.");
858 else if (err == -ENOMEM)
859 DMERR("Invalidating snapshot: Unable to allocate exception.");
860
Jonathan Brassow493df712009-04-02 19:55:31 +0100861 if (s->store->type->drop_snapshot)
862 s->store->type->drop_snapshot(s->store);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800863
864 s->valid = 0;
865
Mike Snitzerfc56f6f2009-12-10 23:52:12 +0000866 dm_table_event(s->ti->table);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800867}
868
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100869static void get_pending_exception(struct dm_snap_pending_exception *pe)
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700870{
871 atomic_inc(&pe->ref_count);
872}
873
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100874static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700875{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100876 struct dm_snap_pending_exception *primary_pe;
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700877 struct bio *origin_bios = NULL;
878
879 primary_pe = pe->primary_pe;
880
881 /*
882 * If this pe is involved in a write to the origin and
883 * it is the last sibling to complete then release
884 * the bios for the original write to the origin.
885 */
886 if (primary_pe &&
Mikulas Patocka7c5f78b2008-10-21 17:44:51 +0100887 atomic_dec_and_test(&primary_pe->ref_count)) {
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700888 origin_bios = bio_list_get(&primary_pe->origin_bios);
Mikulas Patocka7c5f78b2008-10-21 17:44:51 +0100889 free_pending_exception(primary_pe);
890 }
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700891
892 /*
893 * Free the pe if it's not linked to an origin write or if
894 * it's not itself a primary pe.
895 */
896 if (!primary_pe || primary_pe != pe)
897 free_pending_exception(pe);
898
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700899 return origin_bios;
900}
901
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100902static void pending_complete(struct dm_snap_pending_exception *pe, int success)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903{
Jon Brassow1d4989c2009-12-10 23:52:10 +0000904 struct dm_exception *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 struct dm_snapshot *s = pe->snap;
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700906 struct bio *origin_bios = NULL;
907 struct bio *snapshot_bios = NULL;
908 int error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800910 if (!success) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 /* Read/write error - snapshot is unusable */
912 down_write(&s->lock);
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700913 __invalidate_snapshot(s, -EIO);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700914 error = 1;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800915 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 }
917
Jon Brassow3510cb92009-12-10 23:52:11 +0000918 e = alloc_completed_exception();
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800919 if (!e) {
920 down_write(&s->lock);
Alasdair G Kergon695368a2006-10-03 01:15:31 -0700921 __invalidate_snapshot(s, -ENOMEM);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700922 error = 1;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800923 goto out;
924 }
925 *e = pe->e;
926
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700927 down_write(&s->lock);
928 if (!s->valid) {
Jon Brassow3510cb92009-12-10 23:52:11 +0000929 free_completed_exception(e);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700930 error = 1;
931 goto out;
932 }
933
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800934 /*
Mikulas Patockaa8d41b52008-07-21 12:00:34 +0100935 * Check for conflicting reads. This is extremely improbable,
Mikulas Patocka90fa1522009-01-06 03:04:54 +0000936 * so msleep(1) is sufficient and there is no need for a wait queue.
Mikulas Patockaa8d41b52008-07-21 12:00:34 +0100937 */
938 while (__chunk_is_tracked(s, pe->e.old_chunk))
Mikulas Patocka90fa1522009-01-06 03:04:54 +0000939 msleep(1);
Mikulas Patockaa8d41b52008-07-21 12:00:34 +0100940
941 /*
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800942 * Add a proper exception, and remove the
943 * in-flight exception from the list.
944 */
Jon Brassow3510cb92009-12-10 23:52:11 +0000945 dm_insert_exception(&s->complete, e);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -0800946
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 out:
Jon Brassow3510cb92009-12-10 23:52:11 +0000948 dm_remove_exception(&pe->e);
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700949 snapshot_bios = bio_list_get(&pe->snapshot_bios);
Alasdair G Kergon4b832e82006-10-03 01:15:30 -0700950 origin_bios = put_pending_exception(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
Alasdair G Kergon9d493fa2006-10-03 01:15:29 -0700952 up_write(&s->lock);
953
954 /* Submit any pending write bios */
955 if (error)
956 error_bios(snapshot_bios);
957 else
958 flush_bios(snapshot_bios);
959
960 flush_bios(origin_bios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961}
962
963static void commit_callback(void *context, int success)
964{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100965 struct dm_snap_pending_exception *pe = context;
966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 pending_complete(pe, success);
968}
969
970/*
971 * Called when the copy I/O has finished. kcopyd actually runs
972 * this code so don't block.
973 */
Alasdair G Kergon4cdc1d12008-03-28 14:16:10 -0700974static void copy_callback(int read_err, unsigned long write_err, void *context)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975{
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100976 struct dm_snap_pending_exception *pe = context;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 struct dm_snapshot *s = pe->snap;
978
979 if (read_err || write_err)
980 pending_complete(pe, 0);
981
982 else
983 /* Update the metadata if we are persistent */
Jonathan Brassow493df712009-04-02 19:55:31 +0100984 s->store->type->commit_exception(s->store, &pe->e,
985 commit_callback, pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986}
987
988/*
989 * Dispatches the copy operation to kcopyd.
990 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100991static void start_copy(struct dm_snap_pending_exception *pe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992{
993 struct dm_snapshot *s = pe->snap;
Heinz Mauelshagen22a1ceb2008-04-24 21:43:17 +0100994 struct dm_io_region src, dest;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 struct block_device *bdev = s->origin->bdev;
996 sector_t dev_size;
997
998 dev_size = get_dev_size(bdev);
999
1000 src.bdev = bdev;
Jonathan Brassow71fab002009-04-02 19:55:33 +01001001 src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
Mikulas Patockadf96eee2009-10-16 23:18:17 +01001002 src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
Mike Snitzerfc56f6f2009-12-10 23:52:12 +00001004 dest.bdev = s->cow->bdev;
Jonathan Brassow71fab002009-04-02 19:55:33 +01001005 dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 dest.count = src.count;
1007
1008 /* Hand over to kcopyd */
Heinz Mauelshageneb69aca2008-04-24 21:43:19 +01001009 dm_kcopyd_copy(s->kcopyd_client,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 &src, 1, &dest, 0, copy_callback, pe);
1011}
1012
Mikulas Patocka29138082009-04-02 19:55:25 +01001013static struct dm_snap_pending_exception *
1014__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
1015{
Jon Brassow3510cb92009-12-10 23:52:11 +00001016 struct dm_exception *e = dm_lookup_exception(&s->pending, chunk);
Mikulas Patocka29138082009-04-02 19:55:25 +01001017
1018 if (!e)
1019 return NULL;
1020
1021 return container_of(e, struct dm_snap_pending_exception, e);
1022}
1023
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024/*
1025 * Looks to see if this snapshot already has a pending exception
1026 * for this chunk, otherwise it allocates a new one and inserts
1027 * it into the pending table.
1028 *
1029 * NOTE: a write lock must be held on snap->lock before calling
1030 * this.
1031 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001032static struct dm_snap_pending_exception *
Mikulas Patockac6621392009-04-02 19:55:25 +01001033__find_pending_exception(struct dm_snapshot *s,
1034 struct dm_snap_pending_exception *pe, chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035{
Mikulas Patockac6621392009-04-02 19:55:25 +01001036 struct dm_snap_pending_exception *pe2;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001037
Mikulas Patocka29138082009-04-02 19:55:25 +01001038 pe2 = __lookup_pending_exception(s, chunk);
1039 if (pe2) {
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001040 free_pending_exception(pe);
Mikulas Patocka29138082009-04-02 19:55:25 +01001041 return pe2;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001042 }
1043
1044 pe->e.old_chunk = chunk;
1045 bio_list_init(&pe->origin_bios);
1046 bio_list_init(&pe->snapshot_bios);
1047 pe->primary_pe = NULL;
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001048 atomic_set(&pe->ref_count, 0);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001049 pe->started = 0;
1050
Jonathan Brassow493df712009-04-02 19:55:31 +01001051 if (s->store->type->prepare_exception(s->store, &pe->e)) {
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001052 free_pending_exception(pe);
1053 return NULL;
1054 }
1055
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001056 get_pending_exception(pe);
Jon Brassow3510cb92009-12-10 23:52:11 +00001057 dm_insert_exception(&s->pending, &pe->e);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001058
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 return pe;
1060}
1061
Jon Brassow1d4989c2009-12-10 23:52:10 +00001062static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
Milan Brozd74f81f2008-02-08 02:11:27 +00001063 struct bio *bio, chunk_t chunk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064{
Mike Snitzerfc56f6f2009-12-10 23:52:12 +00001065 bio->bi_bdev = s->cow->bdev;
Jonathan Brassow71fab002009-04-02 19:55:33 +01001066 bio->bi_sector = chunk_to_sector(s->store,
1067 dm_chunk_number(e->new_chunk) +
1068 (chunk - e->old_chunk)) +
1069 (bio->bi_sector &
1070 s->store->chunk_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071}
1072
1073static int snapshot_map(struct dm_target *ti, struct bio *bio,
1074 union map_info *map_context)
1075{
Jon Brassow1d4989c2009-12-10 23:52:10 +00001076 struct dm_exception *e;
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001077 struct dm_snapshot *s = ti->private;
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001078 int r = DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 chunk_t chunk;
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001080 struct dm_snap_pending_exception *pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
Mikulas Patocka494b3ee2009-06-22 10:12:25 +01001082 if (unlikely(bio_empty_barrier(bio))) {
Mike Snitzerfc56f6f2009-12-10 23:52:12 +00001083 bio->bi_bdev = s->cow->bdev;
Mikulas Patocka494b3ee2009-06-22 10:12:25 +01001084 return DM_MAPIO_REMAPPED;
1085 }
1086
Jonathan Brassow71fab002009-04-02 19:55:33 +01001087 chunk = sector_to_chunk(s->store, bio->bi_sector);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
1089 /* Full snapshots are not usable */
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001090 /* To get here the table must be live so s->active is always set. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 if (!s->valid)
Alasdair G Kergonf6a80ea2005-07-12 15:53:01 -07001092 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001094 /* FIXME: should only take write lock if we need
1095 * to copy an exception */
1096 down_write(&s->lock);
1097
1098 if (!s->valid) {
1099 r = -EIO;
1100 goto out_unlock;
1101 }
1102
1103 /* If the block is already remapped - use that, else remap it */
Jon Brassow3510cb92009-12-10 23:52:11 +00001104 e = dm_lookup_exception(&s->complete, chunk);
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001105 if (e) {
Milan Brozd74f81f2008-02-08 02:11:27 +00001106 remap_exception(s, e, bio, chunk);
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001107 goto out_unlock;
1108 }
1109
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 /*
1111 * Write to snapshot - higher level takes care of RW/RO
1112 * flags so we should only get this if we are
1113 * writeable.
1114 */
1115 if (bio_rw(bio) == WRITE) {
Mikulas Patocka29138082009-04-02 19:55:25 +01001116 pe = __lookup_pending_exception(s, chunk);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001117 if (!pe) {
Mikulas Patockac6621392009-04-02 19:55:25 +01001118 up_write(&s->lock);
1119 pe = alloc_pending_exception(s);
1120 down_write(&s->lock);
1121
1122 if (!s->valid) {
1123 free_pending_exception(pe);
1124 r = -EIO;
1125 goto out_unlock;
1126 }
1127
Jon Brassow3510cb92009-12-10 23:52:11 +00001128 e = dm_lookup_exception(&s->complete, chunk);
Mikulas Patocka35bf6592009-04-02 19:55:26 +01001129 if (e) {
1130 free_pending_exception(pe);
1131 remap_exception(s, e, bio, chunk);
1132 goto out_unlock;
1133 }
1134
Mikulas Patockac6621392009-04-02 19:55:25 +01001135 pe = __find_pending_exception(s, pe, chunk);
Mikulas Patocka29138082009-04-02 19:55:25 +01001136 if (!pe) {
1137 __invalidate_snapshot(s, -ENOMEM);
1138 r = -EIO;
1139 goto out_unlock;
1140 }
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001141 }
1142
Milan Brozd74f81f2008-02-08 02:11:27 +00001143 remap_exception(s, &pe->e, bio, chunk);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001144 bio_list_add(&pe->snapshot_bios, bio);
1145
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001146 r = DM_MAPIO_SUBMITTED;
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001147
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001148 if (!pe->started) {
1149 /* this is protected by snap->lock */
1150 pe->started = 1;
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001151 up_write(&s->lock);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001152 start_copy(pe);
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001153 goto out;
1154 }
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001155 } else {
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001156 bio->bi_bdev = s->origin->bdev;
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001157 map_context->ptr = track_chunk(s, chunk);
1158 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Alasdair G Kergonba40a2a2006-10-03 01:15:28 -07001160 out_unlock:
1161 up_write(&s->lock);
1162 out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 return r;
1164}
1165
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001166static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1167 int error, union map_info *map_context)
1168{
1169 struct dm_snapshot *s = ti->private;
1170 struct dm_snap_tracked_chunk *c = map_context->ptr;
1171
1172 if (c)
1173 stop_tracking_chunk(s, c);
1174
1175 return 0;
1176}
1177
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178static void snapshot_resume(struct dm_target *ti)
1179{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001180 struct dm_snapshot *s = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -08001182 down_write(&s->lock);
1183 s->active = 1;
1184 up_write(&s->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185}
1186
1187static int snapshot_status(struct dm_target *ti, status_type_t type,
1188 char *result, unsigned int maxlen)
1189{
Jonathan Brassow2e4a31d2009-04-02 19:55:34 +01001190 unsigned sz = 0;
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001191 struct dm_snapshot *snap = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
1193 switch (type) {
1194 case STATUSTYPE_INFO:
Mikulas Patocka94e765722009-12-10 23:51:53 +00001195
1196 down_write(&snap->lock);
1197
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 if (!snap->valid)
Jonathan Brassow2e4a31d2009-04-02 19:55:34 +01001199 DMEMIT("Invalid");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 else {
Mike Snitzer985903b2009-12-10 23:52:11 +00001201 if (snap->store->type->usage) {
1202 sector_t total_sectors, sectors_allocated,
1203 metadata_sectors;
1204 snap->store->type->usage(snap->store,
1205 &total_sectors,
1206 &sectors_allocated,
1207 &metadata_sectors);
1208 DMEMIT("%llu/%llu %llu",
1209 (unsigned long long)sectors_allocated,
1210 (unsigned long long)total_sectors,
1211 (unsigned long long)metadata_sectors);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 }
1213 else
Jonathan Brassow2e4a31d2009-04-02 19:55:34 +01001214 DMEMIT("Unknown");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 }
Mikulas Patocka94e765722009-12-10 23:51:53 +00001216
1217 up_write(&snap->lock);
1218
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 break;
1220
1221 case STATUSTYPE_TABLE:
1222 /*
1223 * kdevname returns a static pointer so we need
1224 * to make private copies if the output is to
1225 * make sense.
1226 */
Mike Snitzerfc56f6f2009-12-10 23:52:12 +00001227 DMEMIT("%s %s", snap->origin->name, snap->cow->name);
Jonathan Brassow1e302a92009-04-02 19:55:35 +01001228 snap->store->type->status(snap->store, type, result + sz,
1229 maxlen - sz);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 break;
1231 }
1232
1233 return 0;
1234}
1235
Mike Snitzer8811f462009-09-04 20:40:19 +01001236static int snapshot_iterate_devices(struct dm_target *ti,
1237 iterate_devices_callout_fn fn, void *data)
1238{
1239 struct dm_snapshot *snap = ti->private;
1240
1241 return fn(ti, snap->origin, 0, ti->len, data);
1242}
1243
1244
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245/*-----------------------------------------------------------------
1246 * Origin methods
1247 *---------------------------------------------------------------*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248static int __origin_write(struct list_head *snapshots, struct bio *bio)
1249{
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001250 int r = DM_MAPIO_REMAPPED, first = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 struct dm_snapshot *snap;
Jon Brassow1d4989c2009-12-10 23:52:10 +00001252 struct dm_exception *e;
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001253 struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 chunk_t chunk;
Alasdair G Kergoneccf0812006-03-27 01:17:42 -08001255 LIST_HEAD(pe_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 /* Do all the snapshots on this origin */
1258 list_for_each_entry (snap, snapshots, list) {
1259
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001260 down_write(&snap->lock);
1261
Alasdair G Kergonaa14ede2006-02-01 03:04:50 -08001262 /* Only deal with valid and active snapshots */
1263 if (!snap->valid || !snap->active)
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001264 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265
Alasdair G Kergond5e404c2005-07-12 15:53:05 -07001266 /* Nothing to do if writing beyond end of snapshot */
Mike Snitzerfc56f6f2009-12-10 23:52:12 +00001267 if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001268 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
1270 /*
1271 * Remember, different snapshots can have
1272 * different chunk sizes.
1273 */
Jonathan Brassow71fab002009-04-02 19:55:33 +01001274 chunk = sector_to_chunk(snap->store, bio->bi_sector);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276 /*
1277 * Check exception table to see if block
1278 * is already remapped in this snapshot
1279 * and trigger an exception if not.
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001280 *
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001281 * ref_count is initialised to 1 so pending_complete()
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001282 * won't destroy the primary_pe while we're inside this loop.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 */
Jon Brassow3510cb92009-12-10 23:52:11 +00001284 e = dm_lookup_exception(&snap->complete, chunk);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001285 if (e)
1286 goto next_snapshot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287
Mikulas Patocka29138082009-04-02 19:55:25 +01001288 pe = __lookup_pending_exception(snap, chunk);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001289 if (!pe) {
Mikulas Patockac6621392009-04-02 19:55:25 +01001290 up_write(&snap->lock);
1291 pe = alloc_pending_exception(snap);
1292 down_write(&snap->lock);
1293
1294 if (!snap->valid) {
1295 free_pending_exception(pe);
1296 goto next_snapshot;
1297 }
1298
Jon Brassow3510cb92009-12-10 23:52:11 +00001299 e = dm_lookup_exception(&snap->complete, chunk);
Mikulas Patocka35bf6592009-04-02 19:55:26 +01001300 if (e) {
1301 free_pending_exception(pe);
1302 goto next_snapshot;
1303 }
1304
Mikulas Patockac6621392009-04-02 19:55:25 +01001305 pe = __find_pending_exception(snap, pe, chunk);
Mikulas Patocka29138082009-04-02 19:55:25 +01001306 if (!pe) {
1307 __invalidate_snapshot(snap, -ENOMEM);
1308 goto next_snapshot;
1309 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 }
1311
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001312 if (!primary_pe) {
1313 /*
1314 * Either every pe here has same
1315 * primary_pe or none has one yet.
1316 */
1317 if (pe->primary_pe)
1318 primary_pe = pe->primary_pe;
1319 else {
1320 primary_pe = pe;
1321 first = 1;
1322 }
1323
1324 bio_list_add(&primary_pe->origin_bios, bio);
1325
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001326 r = DM_MAPIO_SUBMITTED;
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001327 }
1328
1329 if (!pe->primary_pe) {
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001330 pe->primary_pe = primary_pe;
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001331 get_pending_exception(primary_pe);
Alasdair G Kergon76df1c62006-03-27 01:17:45 -08001332 }
1333
1334 if (!pe->started) {
1335 pe->started = 1;
1336 list_add_tail(&pe->list, &pe_queue);
1337 }
1338
1339 next_snapshot:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 up_write(&snap->lock);
1341 }
1342
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001343 if (!primary_pe)
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001344 return r;
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001345
1346 /*
1347 * If this is the first time we're processing this chunk and
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001348 * ref_count is now 1 it means all the pending exceptions
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001349 * got completed while we were in the loop above, so it falls to
1350 * us here to remove the primary_pe and submit any origin_bios.
1351 */
1352
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001353 if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001354 flush_bios(bio_list_get(&primary_pe->origin_bios));
1355 free_pending_exception(primary_pe);
1356 /* If we got here, pe_queue is necessarily empty. */
Alasdair G Kergon4b832e82006-10-03 01:15:30 -07001357 return r;
Alasdair G Kergonb4b610f2006-03-27 01:17:44 -08001358 }
1359
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 /*
1361 * Now that we have a complete pe list we can start the copying.
1362 */
Alasdair G Kergoneccf0812006-03-27 01:17:42 -08001363 list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
1364 start_copy(pe);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365
1366 return r;
1367}
1368
1369/*
1370 * Called on a write from the origin driver.
1371 */
1372static int do_origin(struct dm_dev *origin, struct bio *bio)
1373{
1374 struct origin *o;
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001375 int r = DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376
1377 down_read(&_origins_lock);
1378 o = __lookup_origin(origin->bdev);
1379 if (o)
1380 r = __origin_write(&o->snapshots, bio);
1381 up_read(&_origins_lock);
1382
1383 return r;
1384}
1385
1386/*
1387 * Origin: maps a linear range of a device, with hooks for snapshotting.
1388 */
1389
1390/*
1391 * Construct an origin mapping: <dev_path>
1392 * The context for an origin is merely a 'struct dm_dev *'
1393 * pointing to the real device.
1394 */
1395static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1396{
1397 int r;
1398 struct dm_dev *dev;
1399
1400 if (argc != 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001401 ti->error = "origin: incorrect number of arguments";
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 return -EINVAL;
1403 }
1404
1405 r = dm_get_device(ti, argv[0], 0, ti->len,
1406 dm_table_get_mode(ti->table), &dev);
1407 if (r) {
1408 ti->error = "Cannot get target device";
1409 return r;
1410 }
1411
1412 ti->private = dev;
Mikulas Patocka494b3ee2009-06-22 10:12:25 +01001413 ti->num_flush_requests = 1;
1414
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 return 0;
1416}
1417
1418static void origin_dtr(struct dm_target *ti)
1419{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001420 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 dm_put_device(ti, dev);
1422}
1423
1424static int origin_map(struct dm_target *ti, struct bio *bio,
1425 union map_info *map_context)
1426{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001427 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 bio->bi_bdev = dev->bdev;
1429
Mikulas Patocka494b3ee2009-06-22 10:12:25 +01001430 if (unlikely(bio_empty_barrier(bio)))
1431 return DM_MAPIO_REMAPPED;
1432
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 /* Only tell snapshots if this is a write */
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001434 return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435}
1436
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437/*
1438 * Set the target "split_io" field to the minimum of all the snapshots'
1439 * chunk sizes.
1440 */
1441static void origin_resume(struct dm_target *ti)
1442{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001443 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444
1445 down_read(&_origins_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446
Mikulas Patocka7e201b32009-12-10 23:52:08 +00001447 ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev));
1448
1449 up_read(&_origins_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450}
1451
1452static int origin_status(struct dm_target *ti, status_type_t type, char *result,
1453 unsigned int maxlen)
1454{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001455 struct dm_dev *dev = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456
1457 switch (type) {
1458 case STATUSTYPE_INFO:
1459 result[0] = '\0';
1460 break;
1461
1462 case STATUSTYPE_TABLE:
1463 snprintf(result, maxlen, "%s", dev->name);
1464 break;
1465 }
1466
1467 return 0;
1468}
1469
Mike Snitzer8811f462009-09-04 20:40:19 +01001470static int origin_iterate_devices(struct dm_target *ti,
1471 iterate_devices_callout_fn fn, void *data)
1472{
1473 struct dm_dev *dev = ti->private;
1474
1475 return fn(ti, dev, 0, ti->len, data);
1476}
1477
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478static struct target_type origin_target = {
1479 .name = "snapshot-origin",
Mike Snitzer8811f462009-09-04 20:40:19 +01001480 .version = {1, 7, 0},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 .module = THIS_MODULE,
1482 .ctr = origin_ctr,
1483 .dtr = origin_dtr,
1484 .map = origin_map,
1485 .resume = origin_resume,
1486 .status = origin_status,
Mike Snitzer8811f462009-09-04 20:40:19 +01001487 .iterate_devices = origin_iterate_devices,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488};
1489
1490static struct target_type snapshot_target = {
1491 .name = "snapshot",
Mike Snitzer985903b2009-12-10 23:52:11 +00001492 .version = {1, 8, 0},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 .module = THIS_MODULE,
1494 .ctr = snapshot_ctr,
1495 .dtr = snapshot_dtr,
1496 .map = snapshot_map,
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001497 .end_io = snapshot_end_io,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 .resume = snapshot_resume,
1499 .status = snapshot_status,
Mike Snitzer8811f462009-09-04 20:40:19 +01001500 .iterate_devices = snapshot_iterate_devices,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501};
1502
1503static int __init dm_snapshot_init(void)
1504{
1505 int r;
1506
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001507 r = dm_exception_store_init();
1508 if (r) {
1509 DMERR("Failed to initialize exception stores");
1510 return r;
1511 }
1512
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 r = dm_register_target(&snapshot_target);
1514 if (r) {
1515 DMERR("snapshot target register failed %d", r);
Jonathan Brassow034a1862009-10-16 23:18:14 +01001516 goto bad_register_snapshot_target;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 }
1518
1519 r = dm_register_target(&origin_target);
1520 if (r < 0) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001521 DMERR("Origin target register failed %d", r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 goto bad1;
1523 }
1524
1525 r = init_origin_hash();
1526 if (r) {
1527 DMERR("init_origin_hash failed.");
1528 goto bad2;
1529 }
1530
Jon Brassow1d4989c2009-12-10 23:52:10 +00001531 exception_cache = KMEM_CACHE(dm_exception, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 if (!exception_cache) {
1533 DMERR("Couldn't create exception cache.");
1534 r = -ENOMEM;
1535 goto bad3;
1536 }
1537
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001538 pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 if (!pending_cache) {
1540 DMERR("Couldn't create pending cache.");
1541 r = -ENOMEM;
1542 goto bad4;
1543 }
1544
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001545 tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
1546 if (!tracked_chunk_cache) {
1547 DMERR("Couldn't create cache to track chunks in use.");
1548 r = -ENOMEM;
1549 goto bad5;
1550 }
1551
Alasdair G Kergonca3a9312006-10-03 01:15:30 -07001552 ksnapd = create_singlethread_workqueue("ksnapd");
1553 if (!ksnapd) {
1554 DMERR("Failed to create ksnapd workqueue.");
1555 r = -ENOMEM;
Mikulas Patocka92e86812008-07-21 12:00:35 +01001556 goto bad_pending_pool;
Alasdair G Kergonca3a9312006-10-03 01:15:30 -07001557 }
1558
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 return 0;
1560
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001561bad_pending_pool:
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001562 kmem_cache_destroy(tracked_chunk_cache);
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001563bad5:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 kmem_cache_destroy(pending_cache);
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001565bad4:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 kmem_cache_destroy(exception_cache);
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001567bad3:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 exit_origin_hash();
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001569bad2:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 dm_unregister_target(&origin_target);
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001571bad1:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 dm_unregister_target(&snapshot_target);
Jonathan Brassow034a1862009-10-16 23:18:14 +01001573
1574bad_register_snapshot_target:
1575 dm_exception_store_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 return r;
1577}
1578
1579static void __exit dm_snapshot_exit(void)
1580{
Alasdair G Kergonca3a9312006-10-03 01:15:30 -07001581 destroy_workqueue(ksnapd);
1582
Mikulas Patocka10d3bd02009-01-06 03:04:58 +00001583 dm_unregister_target(&snapshot_target);
1584 dm_unregister_target(&origin_target);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585
1586 exit_origin_hash();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 kmem_cache_destroy(pending_cache);
1588 kmem_cache_destroy(exception_cache);
Mikulas Patockacd45daf2008-07-21 12:00:32 +01001589 kmem_cache_destroy(tracked_chunk_cache);
Alasdair G Kergon4db6bfe2009-01-06 03:05:17 +00001590
1591 dm_exception_store_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592}
1593
1594/* Module hooks */
1595module_init(dm_snapshot_init);
1596module_exit(dm_snapshot_exit);
1597
1598MODULE_DESCRIPTION(DM_NAME " snapshot target");
1599MODULE_AUTHOR("Joe Thornber");
1600MODULE_LICENSE("GPL");