blob: 136db95b212d3f23706e5cfa1fe0d6024f856961 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
59struct after_state_chg_work {
60 struct drbd_work w;
61 union drbd_state os;
62 union drbd_state ns;
63 enum chg_state_flags flags;
64 struct completion *done;
65};
66
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020067static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068int drbdd_init(struct drbd_thread *);
69int drbd_worker(struct drbd_thread *);
70int drbd_asender(struct drbd_thread *);
71
72int drbd_init(void);
73static int drbd_open(struct block_device *bdev, fmode_t mode);
74static int drbd_release(struct gendisk *gd, fmode_t mode);
75static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
76static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
77 union drbd_state ns, enum chg_state_flags flags);
78static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79static void md_sync_timer_fn(unsigned long data);
80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +020081static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
Philipp Reisner509fc012012-07-31 11:22:58 +020082static void _tl_clear(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -070083
Philipp Reisnerb411b362009-09-25 16:07:19 -070084MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
85 "Lars Ellenberg <lars@linbit.com>");
86MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
87MODULE_VERSION(REL_VERSION);
88MODULE_LICENSE("GPL");
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010089MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
90 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070091MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
92
93#include <linux/moduleparam.h>
94/* allow_open_on_secondary */
95MODULE_PARM_DESC(allow_oos, "DONT USE!");
96/* thanks to these macros, if compiled into the kernel (not-module),
97 * this becomes the boot parameter drbd.minor_count */
98module_param(minor_count, uint, 0444);
99module_param(disable_sendpage, bool, 0644);
100module_param(allow_oos, bool, 0);
101module_param(cn_idx, uint, 0444);
102module_param(proc_details, int, 0644);
103
104#ifdef CONFIG_DRBD_FAULT_INJECTION
105int enable_faults;
106int fault_rate;
107static int fault_count;
108int fault_devs;
109/* bitmap of enabled faults */
110module_param(enable_faults, int, 0664);
111/* fault rate % value - applies to all enabled faults */
112module_param(fault_rate, int, 0664);
113/* count of faults inserted */
114module_param(fault_count, int, 0664);
115/* bitmap of devices to insert faults on */
116module_param(fault_devs, int, 0644);
117#endif
118
119/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100120unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Rusty Russell90ab5ee2012-01-13 09:32:20 +1030121bool disable_sendpage;
122bool allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700123unsigned int cn_idx = CN_IDX_DRBD;
124int proc_details; /* Detail level in proc drbd*/
125
126/* Module parameter for setting the user mode helper program
127 * to run. Default is /sbin/drbdadm */
128char usermode_helper[80] = "/sbin/drbdadm";
129
130module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
131
132/* in 2.6.x, our device mapping and config info contains our virtual gendisks
133 * as member "struct gendisk *vdisk;"
134 */
135struct drbd_conf **minor_table;
136
137struct kmem_cache *drbd_request_cache;
138struct kmem_cache *drbd_ee_cache; /* epoch entries */
139struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
140struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
141mempool_t *drbd_request_mempool;
142mempool_t *drbd_ee_mempool;
Lars Ellenberg42818082011-02-23 12:39:46 +0100143mempool_t *drbd_md_io_page_pool;
Lars Ellenberg9476f392011-02-23 17:02:01 +0100144struct bio_set *drbd_md_io_bio_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700145
146/* I do not use a standard mempool, because:
147 1) I want to hand out the pre-allocated objects first.
148 2) I want to be able to interrupt sleeping allocation with a signal.
149 Note: This is a single linked list, the next pointer is the private
150 member of struct page.
151 */
152struct page *drbd_pp_pool;
153spinlock_t drbd_pp_lock;
154int drbd_pp_vacant;
155wait_queue_head_t drbd_pp_wait;
156
157DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
158
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100159static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160 .owner = THIS_MODULE,
161 .open = drbd_open,
162 .release = drbd_release,
163};
164
Lars Ellenberg9476f392011-02-23 17:02:01 +0100165static void bio_destructor_drbd(struct bio *bio)
166{
167 bio_free(bio, drbd_md_io_bio_set);
168}
169
170struct bio *bio_alloc_drbd(gfp_t gfp_mask)
171{
172 struct bio *bio;
173
174 if (!drbd_md_io_bio_set)
175 return bio_alloc(gfp_mask, 1);
176
177 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
178 if (!bio)
179 return NULL;
180 bio->bi_destructor = bio_destructor_drbd;
181 return bio;
182}
183
Philipp Reisnerb411b362009-09-25 16:07:19 -0700184#ifdef __CHECKER__
185/* When checking with sparse, and this is an inline function, sparse will
186 give tons of false positives. When this is a real functions sparse works.
187 */
188int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
189{
190 int io_allowed;
191
192 atomic_inc(&mdev->local_cnt);
193 io_allowed = (mdev->state.disk >= mins);
194 if (!io_allowed) {
195 if (atomic_dec_and_test(&mdev->local_cnt))
196 wake_up(&mdev->misc_wait);
197 }
198 return io_allowed;
199}
200
201#endif
202
203/**
204 * DOC: The transfer log
205 *
206 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
207 * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
208 * of the list. There is always at least one &struct drbd_tl_epoch object.
209 *
210 * Each &struct drbd_tl_epoch has a circular double linked list of requests
211 * attached.
212 */
213static int tl_init(struct drbd_conf *mdev)
214{
215 struct drbd_tl_epoch *b;
216
217 /* during device minor initialization, we may well use GFP_KERNEL */
218 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
219 if (!b)
220 return 0;
221 INIT_LIST_HEAD(&b->requests);
222 INIT_LIST_HEAD(&b->w.list);
223 b->next = NULL;
224 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200225 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
227
228 mdev->oldest_tle = b;
229 mdev->newest_tle = b;
230 INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100231 INIT_LIST_HEAD(&mdev->barrier_acked_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232
233 mdev->tl_hash = NULL;
234 mdev->tl_hash_s = 0;
235
236 return 1;
237}
238
239static void tl_cleanup(struct drbd_conf *mdev)
240{
241 D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
242 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
243 kfree(mdev->oldest_tle);
244 mdev->oldest_tle = NULL;
245 kfree(mdev->unused_spare_tle);
246 mdev->unused_spare_tle = NULL;
247 kfree(mdev->tl_hash);
248 mdev->tl_hash = NULL;
249 mdev->tl_hash_s = 0;
250}
251
252/**
253 * _tl_add_barrier() - Adds a barrier to the transfer log
254 * @mdev: DRBD device.
255 * @new: Barrier to be added before the current head of the TL.
256 *
257 * The caller must hold the req_lock.
258 */
259void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
260{
261 struct drbd_tl_epoch *newest_before;
262
263 INIT_LIST_HEAD(&new->requests);
264 INIT_LIST_HEAD(&new->w.list);
265 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
266 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200267 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268
269 newest_before = mdev->newest_tle;
Lars Ellenbergc088b2d2012-03-23 13:57:13 +0100270 new->br_number = newest_before->br_number+1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271 if (mdev->newest_tle != new) {
272 mdev->newest_tle->next = new;
273 mdev->newest_tle = new;
274 }
275}
276
277/**
278 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
279 * @mdev: DRBD device.
280 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
281 * @set_size: Expected number of requests before that barrier.
282 *
283 * In case the passed barrier_nr or set_size does not match the oldest
284 * &struct drbd_tl_epoch objects this function will cause a termination
285 * of the connection.
286 */
287void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
288 unsigned int set_size)
289{
290 struct drbd_tl_epoch *b, *nob; /* next old barrier */
291 struct list_head *le, *tle;
292 struct drbd_request *r;
293
294 spin_lock_irq(&mdev->req_lock);
295
296 b = mdev->oldest_tle;
297
298 /* first some paranoia code */
299 if (b == NULL) {
300 dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
301 barrier_nr);
302 goto bail;
303 }
304 if (b->br_number != barrier_nr) {
305 dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
306 barrier_nr, b->br_number);
307 goto bail;
308 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200309 if (b->n_writes != set_size) {
310 dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
311 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700312 goto bail;
313 }
314
315 /* Clean up list of requests processed during current epoch */
316 list_for_each_safe(le, tle, &b->requests) {
317 r = list_entry(le, struct drbd_request, tl_requests);
318 _req_mod(r, barrier_acked);
319 }
320 /* There could be requests on the list waiting for completion
321 of the write to the local disk. To avoid corruptions of
322 slab's data structures we have to remove the lists head.
323
324 Also there could have been a barrier ack out of sequence, overtaking
325 the write acks - which would be a bug and violating write ordering.
326 To not deadlock in case we lose connection while such requests are
327 still pending, we need some way to find them for the
328 _req_mode(connection_lost_while_pending).
329
330 These have been list_move'd to the out_of_sequence_requests list in
331 _req_mod(, barrier_acked) above.
332 */
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100333 list_splice_init(&b->requests, &mdev->barrier_acked_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334
335 nob = b->next;
336 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
337 _tl_add_barrier(mdev, b);
338 if (nob)
339 mdev->oldest_tle = nob;
340 /* if nob == NULL b was the only barrier, and becomes the new
341 barrier. Therefore mdev->oldest_tle points already to b */
342 } else {
343 D_ASSERT(nob != NULL);
344 mdev->oldest_tle = nob;
345 kfree(b);
346 }
347
348 spin_unlock_irq(&mdev->req_lock);
349 dec_ap_pending(mdev);
350
351 return;
352
353bail:
354 spin_unlock_irq(&mdev->req_lock);
355 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
356}
357
Philipp Reisner617049a2010-12-22 12:48:31 +0100358
Philipp Reisner11b58e72010-05-12 17:08:26 +0200359/**
360 * _tl_restart() - Walks the transfer log, and applies an action to all requests
361 * @mdev: DRBD device.
362 * @what: The action/event to perform with all request objects
363 *
364 * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
Philipp Reisnerfd2491f2011-07-18 16:25:15 +0200365 * restart_frozen_disk_io.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200366 */
367static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
368{
369 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200370 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200371 struct drbd_request *req;
372 int rv, n_writes, n_reads;
373
374 b = mdev->oldest_tle;
375 pn = &mdev->oldest_tle;
376 while (b) {
377 n_writes = 0;
378 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200379 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200380 list_for_each_safe(le, tle, &b->requests) {
381 req = list_entry(le, struct drbd_request, tl_requests);
382 rv = _req_mod(req, what);
383
384 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
385 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
386 }
387 tmp = b->next;
388
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200389 if (n_writes) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200390 if (what == resend) {
391 b->n_writes = n_writes;
392 if (b->w.cb == NULL) {
393 b->w.cb = w_send_barrier;
394 inc_ap_pending(mdev);
395 set_bit(CREATE_BARRIER, &mdev->flags);
396 }
397
398 drbd_queue_work(&mdev->data.work, &b->w);
399 }
400 pn = &b->next;
401 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200402 if (n_reads)
403 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200404 /* there could still be requests on that ring list,
405 * in case local io is still pending */
406 list_del(&b->requests);
407
408 /* dec_ap_pending corresponding to queue_barrier.
409 * the newest barrier may not have been queued yet,
410 * in which case w.cb is still NULL. */
411 if (b->w.cb != NULL)
412 dec_ap_pending(mdev);
413
414 if (b == mdev->newest_tle) {
415 /* recycle, but reinit! */
416 D_ASSERT(tmp == NULL);
417 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200418 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200419 INIT_LIST_HEAD(&b->w.list);
420 b->w.cb = NULL;
421 b->br_number = net_random();
422 b->n_writes = 0;
423
424 *pn = b;
425 break;
426 }
427 *pn = tmp;
428 kfree(b);
429 }
430 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200431 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200432 }
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100433
434 /* Actions operating on the disk state, also want to work on
435 requests that got barrier acked. */
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100436
Philipp Reisner509fc012012-07-31 11:22:58 +0200437 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
438 req = list_entry(le, struct drbd_request, tl_requests);
439 _req_mod(req, what);
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100440 }
Philipp Reisner11b58e72010-05-12 17:08:26 +0200441}
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443
444/**
445 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
446 * @mdev: DRBD device.
447 *
448 * This is called after the connection to the peer was lost. The storage covered
449 * by the requests on the transfer gets marked as our of sync. Called from the
450 * receiver thread and the worker thread.
451 */
452void tl_clear(struct drbd_conf *mdev)
453{
Philipp Reisner509fc012012-07-31 11:22:58 +0200454 spin_lock_irq(&mdev->req_lock);
455 _tl_clear(mdev);
456 spin_unlock_irq(&mdev->req_lock);
457}
458
459static void _tl_clear(struct drbd_conf *mdev)
460{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700461 struct list_head *le, *tle;
462 struct drbd_request *r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700463
Philipp Reisner11b58e72010-05-12 17:08:26 +0200464 _tl_restart(mdev, connection_lost_while_pending);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465
466 /* we expect this list to be empty. */
467 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
468
469 /* but just in case, clean it up anyways! */
470 list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
471 r = list_entry(le, struct drbd_request, tl_requests);
472 /* It would be nice to complete outside of spinlock.
473 * But this is easier for now. */
474 _req_mod(r, connection_lost_while_pending);
475 }
476
477 /* ensure bit indicating barrier is required is clear */
478 clear_bit(CREATE_BARRIER, &mdev->flags);
479
Philipp Reisner288f4222010-05-27 15:07:43 +0200480 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
481
Philipp Reisnerb411b362009-09-25 16:07:19 -0700482}
483
Philipp Reisner11b58e72010-05-12 17:08:26 +0200484void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
485{
486 spin_lock_irq(&mdev->req_lock);
487 _tl_restart(mdev, what);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488 spin_unlock_irq(&mdev->req_lock);
489}
490
491/**
Philipp Reisnerfd2491f2011-07-18 16:25:15 +0200492 * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
493 * @mdev: DRBD device.
494 */
495void tl_abort_disk_io(struct drbd_conf *mdev)
496{
497 struct drbd_tl_epoch *b;
498 struct list_head *le, *tle;
499 struct drbd_request *req;
500
501 spin_lock_irq(&mdev->req_lock);
502 b = mdev->oldest_tle;
503 while (b) {
504 list_for_each_safe(le, tle, &b->requests) {
505 req = list_entry(le, struct drbd_request, tl_requests);
506 if (!(req->rq_state & RQ_LOCAL_PENDING))
507 continue;
508 _req_mod(req, abort_disk_io);
509 }
510 b = b->next;
511 }
512
513 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
514 req = list_entry(le, struct drbd_request, tl_requests);
515 if (!(req->rq_state & RQ_LOCAL_PENDING))
516 continue;
517 _req_mod(req, abort_disk_io);
518 }
519
520 spin_unlock_irq(&mdev->req_lock);
521}
522
523/**
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100524 * cl_wide_st_chg() - true if the state change is a cluster wide one
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 * @mdev: DRBD device.
526 * @os: old (current) state.
527 * @ns: new (wanted) state.
528 */
529static int cl_wide_st_chg(struct drbd_conf *mdev,
530 union drbd_state os, union drbd_state ns)
531{
532 return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
533 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
534 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
535 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
Philipp Reisner02ee8f92011-03-14 11:54:47 +0100536 (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
538 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
539}
540
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100541enum drbd_state_rv
542drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
543 union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700544{
545 unsigned long flags;
546 union drbd_state os, ns;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100547 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548
549 spin_lock_irqsave(&mdev->req_lock, flags);
550 os = mdev->state;
551 ns.i = (os.i & ~mask.i) | val.i;
552 rv = _drbd_set_state(mdev, ns, f, NULL);
553 ns = mdev->state;
554 spin_unlock_irqrestore(&mdev->req_lock, flags);
555
556 return rv;
557}
558
559/**
560 * drbd_force_state() - Impose a change which happens outside our control on our state
561 * @mdev: DRBD device.
562 * @mask: mask of state bits to change.
563 * @val: value of new state bits.
564 */
565void drbd_force_state(struct drbd_conf *mdev,
566 union drbd_state mask, union drbd_state val)
567{
568 drbd_change_state(mdev, CS_HARD, mask, val);
569}
570
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100571static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
572static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
573 union drbd_state,
574 union drbd_state);
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200575enum sanitize_state_warnings {
576 NO_WARNING,
577 ABORTED_ONLINE_VERIFY,
578 ABORTED_RESYNC,
579 CONNECTION_LOST_NEGOTIATING,
580 IMPLICITLY_UPGRADED_DISK,
581 IMPLICITLY_UPGRADED_PDSK,
582};
Philipp Reisnerb411b362009-09-25 16:07:19 -0700583static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200584 union drbd_state ns, enum sanitize_state_warnings *warn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700585int drbd_send_state_req(struct drbd_conf *,
586 union drbd_state, union drbd_state);
587
Andreas Gruenbacherc8b32562010-12-08 01:06:16 +0100588static enum drbd_state_rv
589_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
590 union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591{
592 union drbd_state os, ns;
593 unsigned long flags;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100594 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700595
596 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
597 return SS_CW_SUCCESS;
598
599 if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
600 return SS_CW_FAILED_BY_PEER;
601
602 rv = 0;
603 spin_lock_irqsave(&mdev->req_lock, flags);
604 os = mdev->state;
605 ns.i = (os.i & ~mask.i) | val.i;
606 ns = sanitize_state(mdev, os, ns, NULL);
607
608 if (!cl_wide_st_chg(mdev, os, ns))
609 rv = SS_CW_NO_NEED;
610 if (!rv) {
611 rv = is_valid_state(mdev, ns);
612 if (rv == SS_SUCCESS) {
613 rv = is_valid_state_transition(mdev, ns, os);
614 if (rv == SS_SUCCESS)
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100615 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700616 }
617 }
618 spin_unlock_irqrestore(&mdev->req_lock, flags);
619
620 return rv;
621}
622
623/**
624 * drbd_req_state() - Perform an eventually cluster wide state change
625 * @mdev: DRBD device.
626 * @mask: mask of state bits to change.
627 * @val: value of new state bits.
628 * @f: flags
629 *
630 * Should not be called directly, use drbd_request_state() or
631 * _drbd_request_state().
632 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100633static enum drbd_state_rv
634drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
635 union drbd_state val, enum chg_state_flags f)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700636{
637 struct completion done;
638 unsigned long flags;
639 union drbd_state os, ns;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100640 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641
642 init_completion(&done);
643
644 if (f & CS_SERIALIZE)
645 mutex_lock(&mdev->state_mutex);
646
647 spin_lock_irqsave(&mdev->req_lock, flags);
648 os = mdev->state;
649 ns.i = (os.i & ~mask.i) | val.i;
650 ns = sanitize_state(mdev, os, ns, NULL);
651
652 if (cl_wide_st_chg(mdev, os, ns)) {
653 rv = is_valid_state(mdev, ns);
654 if (rv == SS_SUCCESS)
655 rv = is_valid_state_transition(mdev, ns, os);
656 spin_unlock_irqrestore(&mdev->req_lock, flags);
657
658 if (rv < SS_SUCCESS) {
659 if (f & CS_VERBOSE)
660 print_st_err(mdev, os, ns, rv);
661 goto abort;
662 }
663
664 drbd_state_lock(mdev);
665 if (!drbd_send_state_req(mdev, mask, val)) {
666 drbd_state_unlock(mdev);
667 rv = SS_CW_FAILED_BY_PEER;
668 if (f & CS_VERBOSE)
669 print_st_err(mdev, os, ns, rv);
670 goto abort;
671 }
672
673 wait_event(mdev->state_wait,
674 (rv = _req_st_cond(mdev, mask, val)));
675
676 if (rv < SS_SUCCESS) {
677 drbd_state_unlock(mdev);
678 if (f & CS_VERBOSE)
679 print_st_err(mdev, os, ns, rv);
680 goto abort;
681 }
682 spin_lock_irqsave(&mdev->req_lock, flags);
683 os = mdev->state;
684 ns.i = (os.i & ~mask.i) | val.i;
685 rv = _drbd_set_state(mdev, ns, f, &done);
686 drbd_state_unlock(mdev);
687 } else {
688 rv = _drbd_set_state(mdev, ns, f, &done);
689 }
690
691 spin_unlock_irqrestore(&mdev->req_lock, flags);
692
693 if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
694 D_ASSERT(current != mdev->worker.task);
695 wait_for_completion(&done);
696 }
697
698abort:
699 if (f & CS_SERIALIZE)
700 mutex_unlock(&mdev->state_mutex);
701
702 return rv;
703}
704
705/**
706 * _drbd_request_state() - Request a state change (with flags)
707 * @mdev: DRBD device.
708 * @mask: mask of state bits to change.
709 * @val: value of new state bits.
710 * @f: flags
711 *
712 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
713 * flag, or when logging of failed state change requests is not desired.
714 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100715enum drbd_state_rv
716_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
717 union drbd_state val, enum chg_state_flags f)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718{
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100719 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720
721 wait_event(mdev->state_wait,
722 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
723
724 return rv;
725}
726
727static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
728{
729 dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
730 name,
731 drbd_conn_str(ns.conn),
732 drbd_role_str(ns.role),
733 drbd_role_str(ns.peer),
734 drbd_disk_str(ns.disk),
735 drbd_disk_str(ns.pdsk),
Philipp Reisnerfb22c402010-09-08 23:20:21 +0200736 is_susp(ns) ? 's' : 'r',
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737 ns.aftr_isp ? 'a' : '-',
738 ns.peer_isp ? 'p' : '-',
739 ns.user_isp ? 'u' : '-'
740 );
741}
742
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100743void print_st_err(struct drbd_conf *mdev, union drbd_state os,
744 union drbd_state ns, enum drbd_state_rv err)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745{
746 if (err == SS_IN_TRANSIENT_STATE)
747 return;
748 dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
749 print_st(mdev, " state", os);
750 print_st(mdev, "wanted", ns);
751}
752
753
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754/**
755 * is_valid_state() - Returns an SS_ error code if ns is not valid
756 * @mdev: DRBD device.
757 * @ns: State to consider.
758 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100759static enum drbd_state_rv
760is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700761{
762 /* See drbd_state_sw_errors in drbd_strings.c */
763
764 enum drbd_fencing_p fp;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100765 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766
767 fp = FP_DONT_CARE;
768 if (get_ldev(mdev)) {
769 fp = mdev->ldev->dc.fencing;
770 put_ldev(mdev);
771 }
772
773 if (get_net_conf(mdev)) {
774 if (!mdev->net_conf->two_primaries &&
775 ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
776 rv = SS_TWO_PRIMARIES;
777 put_net_conf(mdev);
778 }
779
780 if (rv <= 0)
781 /* already found a reason to abort */;
782 else if (ns.role == R_SECONDARY && mdev->open_cnt)
783 rv = SS_DEVICE_IN_USE;
784
785 else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
786 rv = SS_NO_UP_TO_DATE_DISK;
787
788 else if (fp >= FP_RESOURCE &&
789 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
790 rv = SS_PRIMARY_NOP;
791
792 else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
793 rv = SS_NO_UP_TO_DATE_DISK;
794
795 else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
796 rv = SS_NO_LOCAL_DISK;
797
798 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
799 rv = SS_NO_REMOTE_DISK;
800
Lars Ellenberg8d4ce822010-04-01 16:59:32 +0200801 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
802 rv = SS_NO_UP_TO_DATE_DISK;
803
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 else if ((ns.conn == C_CONNECTED ||
805 ns.conn == C_WF_BITMAP_S ||
806 ns.conn == C_SYNC_SOURCE ||
807 ns.conn == C_PAUSED_SYNC_S) &&
808 ns.disk == D_OUTDATED)
809 rv = SS_CONNECTED_OUTDATES;
810
811 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
812 (mdev->sync_conf.verify_alg[0] == 0))
813 rv = SS_NO_VERIFY_ALG;
814
815 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
816 mdev->agreed_pro_version < 88)
817 rv = SS_NOT_SUPPORTED;
818
Philipp Reisnerfa7d9392011-05-17 14:48:55 +0200819 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
820 rv = SS_CONNECTED_OUTDATES;
821
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 return rv;
823}
824
825/**
826 * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
827 * @mdev: DRBD device.
828 * @ns: new state.
829 * @os: old state.
830 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100831static enum drbd_state_rv
832is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
833 union drbd_state os)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834{
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100835 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836
837 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
838 os.conn > C_CONNECTED)
839 rv = SS_RESYNC_RUNNING;
840
841 if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
842 rv = SS_ALREADY_STANDALONE;
843
844 if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
845 rv = SS_IS_DISKLESS;
846
847 if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
848 rv = SS_NO_NET_CONFIG;
849
850 if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
851 rv = SS_LOWER_THAN_OUTDATED;
852
853 if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
854 rv = SS_IN_TRANSIENT_STATE;
855
856 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
857 rv = SS_IN_TRANSIENT_STATE;
858
Philipp Reisner197296f2012-03-26 16:47:11 +0200859 /* While establishing a connection only allow cstate to change.
860 Delay/refuse role changes, detach attach etc... */
861 if (test_bit(STATE_SENT, &mdev->flags) &&
862 !(os.conn == C_WF_REPORT_PARAMS ||
863 (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
864 rv = SS_IN_TRANSIENT_STATE;
865
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
867 rv = SS_NEED_CONNECTION;
868
869 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
870 ns.conn != os.conn && os.conn > C_CONNECTED)
871 rv = SS_RESYNC_RUNNING;
872
873 if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
874 os.conn < C_CONNECTED)
875 rv = SS_NEED_CONNECTION;
876
Philipp Reisner1fc80cf2010-11-22 14:18:47 +0100877 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
878 && os.conn < C_WF_REPORT_PARAMS)
879 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
880
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 return rv;
882}
883
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200884static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
885{
886 static const char *msg_table[] = {
887 [NO_WARNING] = "",
888 [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
889 [ABORTED_RESYNC] = "Resync aborted.",
890 [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
891 [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
892 [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
893 };
894
895 if (warn != NO_WARNING)
896 dev_warn(DEV, "%s\n", msg_table[warn]);
897}
898
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899/**
900 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
901 * @mdev: DRBD device.
902 * @os: old state.
903 * @ns: new state.
904 * @warn_sync_abort:
905 *
906 * When we loose connection, we have to set the state of the peers disk (pdsk)
907 * to D_UNKNOWN. This rule and many more along those lines are in this function.
908 */
909static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200910 union drbd_state ns, enum sanitize_state_warnings *warn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911{
912 enum drbd_fencing_p fp;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +0100913 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200915 if (warn)
916 *warn = NO_WARNING;
917
Philipp Reisnerb411b362009-09-25 16:07:19 -0700918 fp = FP_DONT_CARE;
919 if (get_ldev(mdev)) {
920 fp = mdev->ldev->dc.fencing;
921 put_ldev(mdev);
922 }
923
924 /* Disallow Network errors to configure a device's network part */
925 if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
926 os.conn <= C_DISCONNECTING)
927 ns.conn = os.conn;
928
Lars Ellenbergf2906e12010-07-21 17:04:32 +0200929 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
930 * If you try to go into some Sync* state, that shall fail (elsewhere). */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
Lars Ellenberg545752d2011-12-05 14:39:25 +0100932 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933 ns.conn = os.conn;
934
Lars Ellenberg82f59cc2010-10-16 12:13:47 +0200935 /* we cannot fail (again) if we already detached */
936 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
937 ns.disk = D_DISKLESS;
938
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939 /* After C_DISCONNECTING only C_STANDALONE may follow */
940 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
941 ns.conn = os.conn;
942
943 if (ns.conn < C_CONNECTED) {
944 ns.peer_isp = 0;
945 ns.peer = R_UNKNOWN;
946 if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
947 ns.pdsk = D_UNKNOWN;
948 }
949
950 /* Clear the aftr_isp when becoming unconfigured */
951 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
952 ns.aftr_isp = 0;
953
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954 /* Abort resync if a disk fails/detaches */
955 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
956 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200957 if (warn)
958 *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
959 ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 ns.conn = C_CONNECTED;
961 }
962
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 /* Connection breaks down before we finished "Negotiating" */
964 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
965 get_ldev_if_state(mdev, D_NEGOTIATING)) {
966 if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
967 ns.disk = mdev->new_state_tmp.disk;
968 ns.pdsk = mdev->new_state_tmp.pdsk;
969 } else {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200970 if (warn)
971 *warn = CONNECTION_LOST_NEGOTIATING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972 ns.disk = D_DISKLESS;
973 ns.pdsk = D_UNKNOWN;
974 }
975 put_ldev(mdev);
976 }
977
Philipp Reisnerab17b68f2010-11-17 16:54:36 +0100978 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
979 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
980 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
981 ns.disk = D_UP_TO_DATE;
982 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
983 ns.pdsk = D_UP_TO_DATE;
984 }
985
986 /* Implications of the connection stat on the disk states */
987 disk_min = D_DISKLESS;
988 disk_max = D_UP_TO_DATE;
989 pdsk_min = D_INCONSISTENT;
990 pdsk_max = D_UNKNOWN;
991 switch ((enum drbd_conns)ns.conn) {
992 case C_WF_BITMAP_T:
993 case C_PAUSED_SYNC_T:
994 case C_STARTING_SYNC_T:
995 case C_WF_SYNC_UUID:
996 case C_BEHIND:
997 disk_min = D_INCONSISTENT;
998 disk_max = D_OUTDATED;
999 pdsk_min = D_UP_TO_DATE;
1000 pdsk_max = D_UP_TO_DATE;
1001 break;
1002 case C_VERIFY_S:
1003 case C_VERIFY_T:
1004 disk_min = D_UP_TO_DATE;
1005 disk_max = D_UP_TO_DATE;
1006 pdsk_min = D_UP_TO_DATE;
1007 pdsk_max = D_UP_TO_DATE;
1008 break;
1009 case C_CONNECTED:
1010 disk_min = D_DISKLESS;
1011 disk_max = D_UP_TO_DATE;
1012 pdsk_min = D_DISKLESS;
1013 pdsk_max = D_UP_TO_DATE;
1014 break;
1015 case C_WF_BITMAP_S:
1016 case C_PAUSED_SYNC_S:
1017 case C_STARTING_SYNC_S:
1018 case C_AHEAD:
1019 disk_min = D_UP_TO_DATE;
1020 disk_max = D_UP_TO_DATE;
1021 pdsk_min = D_INCONSISTENT;
1022 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
1023 break;
1024 case C_SYNC_TARGET:
1025 disk_min = D_INCONSISTENT;
1026 disk_max = D_INCONSISTENT;
1027 pdsk_min = D_UP_TO_DATE;
1028 pdsk_max = D_UP_TO_DATE;
1029 break;
1030 case C_SYNC_SOURCE:
1031 disk_min = D_UP_TO_DATE;
1032 disk_max = D_UP_TO_DATE;
1033 pdsk_min = D_INCONSISTENT;
1034 pdsk_max = D_INCONSISTENT;
1035 break;
1036 case C_STANDALONE:
1037 case C_DISCONNECTING:
1038 case C_UNCONNECTED:
1039 case C_TIMEOUT:
1040 case C_BROKEN_PIPE:
1041 case C_NETWORK_FAILURE:
1042 case C_PROTOCOL_ERROR:
1043 case C_TEAR_DOWN:
1044 case C_WF_CONNECTION:
1045 case C_WF_REPORT_PARAMS:
1046 case C_MASK:
1047 break;
1048 }
1049 if (ns.disk > disk_max)
1050 ns.disk = disk_max;
1051
1052 if (ns.disk < disk_min) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001053 if (warn)
1054 *warn = IMPLICITLY_UPGRADED_DISK;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +01001055 ns.disk = disk_min;
1056 }
1057 if (ns.pdsk > pdsk_max)
1058 ns.pdsk = pdsk_max;
1059
1060 if (ns.pdsk < pdsk_min) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001061 if (warn)
1062 *warn = IMPLICITLY_UPGRADED_PDSK;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +01001063 ns.pdsk = pdsk_min;
1064 }
1065
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 if (fp == FP_STONITH &&
Philipp Reisner0a492162009-10-21 13:08:29 +02001067 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
1068 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001069 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
Philipp Reisner265be2d2010-05-31 10:14:17 +02001070
1071 if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
1072 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
1073 !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001074 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075
1076 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
1077 if (ns.conn == C_SYNC_SOURCE)
1078 ns.conn = C_PAUSED_SYNC_S;
1079 if (ns.conn == C_SYNC_TARGET)
1080 ns.conn = C_PAUSED_SYNC_T;
1081 } else {
1082 if (ns.conn == C_PAUSED_SYNC_S)
1083 ns.conn = C_SYNC_SOURCE;
1084 if (ns.conn == C_PAUSED_SYNC_T)
1085 ns.conn = C_SYNC_TARGET;
1086 }
1087
1088 return ns;
1089}
1090
1091/* helper for __drbd_set_state */
1092static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
1093{
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001094 if (mdev->agreed_pro_version < 90)
1095 mdev->ov_start_sector = 0;
1096 mdev->rs_total = drbd_bm_bits(mdev);
1097 mdev->ov_position = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098 if (cs == C_VERIFY_T) {
1099 /* starting online verify from an arbitrary position
1100 * does not fit well into the existing protocol.
1101 * on C_VERIFY_T, we initialize ov_left and friends
1102 * implicitly in receive_DataRequest once the
1103 * first P_OV_REQUEST is received */
1104 mdev->ov_start_sector = ~(sector_t)0;
1105 } else {
1106 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001107 if (bit >= mdev->rs_total) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108 mdev->ov_start_sector =
1109 BM_BIT_TO_SECT(mdev->rs_total - 1);
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001110 mdev->rs_total = 1;
1111 } else
1112 mdev->rs_total -= bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001113 mdev->ov_position = mdev->ov_start_sector;
1114 }
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001115 mdev->ov_left = mdev->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116}
1117
Philipp Reisner07782862010-08-31 12:00:50 +02001118static void drbd_resume_al(struct drbd_conf *mdev)
1119{
1120 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
1121 dev_info(DEV, "Resumed AL updates\n");
1122}
1123
Philipp Reisnerb411b362009-09-25 16:07:19 -07001124/**
1125 * __drbd_set_state() - Set a new DRBD state
1126 * @mdev: DRBD device.
1127 * @ns: new state.
1128 * @flags: Flags
1129 * @done: Optional completion, that will get completed after the after_state_ch() finished
1130 *
1131 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
1132 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001133enum drbd_state_rv
1134__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1135 enum chg_state_flags flags, struct completion *done)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136{
1137 union drbd_state os;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001138 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001139 enum sanitize_state_warnings ssw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140 struct after_state_chg_work *ascw;
1141
1142 os = mdev->state;
1143
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001144 ns = sanitize_state(mdev, os, ns, &ssw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145
1146 if (ns.i == os.i)
1147 return SS_NOTHING_TO_DO;
1148
1149 if (!(flags & CS_HARD)) {
1150 /* pre-state-change checks ; only look at ns */
1151 /* See drbd_state_sw_errors in drbd_strings.c */
1152
1153 rv = is_valid_state(mdev, ns);
1154 if (rv < SS_SUCCESS) {
1155 /* If the old state was illegal as well, then let
1156 this happen...*/
1157
Philipp Reisner1616a252010-06-10 16:55:15 +02001158 if (is_valid_state(mdev, os) == rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001159 rv = is_valid_state_transition(mdev, ns, os);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001160 } else
1161 rv = is_valid_state_transition(mdev, ns, os);
1162 }
1163
1164 if (rv < SS_SUCCESS) {
1165 if (flags & CS_VERBOSE)
1166 print_st_err(mdev, os, ns, rv);
1167 return rv;
1168 }
1169
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001170 print_sanitize_warnings(mdev, ssw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001171
1172 {
Andreas Gruenbacher662d91a2010-12-07 03:01:41 +01001173 char *pbp, pb[300];
1174 pbp = pb;
1175 *pbp = 0;
1176 if (ns.role != os.role)
1177 pbp += sprintf(pbp, "role( %s -> %s ) ",
1178 drbd_role_str(os.role),
1179 drbd_role_str(ns.role));
1180 if (ns.peer != os.peer)
1181 pbp += sprintf(pbp, "peer( %s -> %s ) ",
1182 drbd_role_str(os.peer),
1183 drbd_role_str(ns.peer));
1184 if (ns.conn != os.conn)
1185 pbp += sprintf(pbp, "conn( %s -> %s ) ",
1186 drbd_conn_str(os.conn),
1187 drbd_conn_str(ns.conn));
1188 if (ns.disk != os.disk)
1189 pbp += sprintf(pbp, "disk( %s -> %s ) ",
1190 drbd_disk_str(os.disk),
1191 drbd_disk_str(ns.disk));
1192 if (ns.pdsk != os.pdsk)
1193 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
1194 drbd_disk_str(os.pdsk),
1195 drbd_disk_str(ns.pdsk));
1196 if (is_susp(ns) != is_susp(os))
1197 pbp += sprintf(pbp, "susp( %d -> %d ) ",
1198 is_susp(os),
1199 is_susp(ns));
1200 if (ns.aftr_isp != os.aftr_isp)
1201 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
1202 os.aftr_isp,
1203 ns.aftr_isp);
1204 if (ns.peer_isp != os.peer_isp)
1205 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
1206 os.peer_isp,
1207 ns.peer_isp);
1208 if (ns.user_isp != os.user_isp)
1209 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
1210 os.user_isp,
1211 ns.user_isp);
1212 dev_info(DEV, "%s\n", pb);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001213 }
1214
1215 /* solve the race between becoming unconfigured,
1216 * worker doing the cleanup, and
1217 * admin reconfiguring us:
1218 * on (re)configure, first set CONFIG_PENDING,
1219 * then wait for a potentially exiting worker,
1220 * start the worker, and schedule one no_op.
1221 * then proceed with configuration.
1222 */
1223 if (ns.disk == D_DISKLESS &&
1224 ns.conn == C_STANDALONE &&
1225 ns.role == R_SECONDARY &&
1226 !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
1227 set_bit(DEVICE_DYING, &mdev->flags);
1228
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001229 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1230 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
1231 * drbd_ldev_destroy() won't happen before our corresponding
1232 * after_state_ch works run, where we put_ldev again. */
1233 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1234 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1235 atomic_inc(&mdev->local_cnt);
1236
1237 mdev->state = ns;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001238
1239 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1240 drbd_print_uuids(mdev, "attached to UUIDs");
1241
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 wake_up(&mdev->misc_wait);
1243 wake_up(&mdev->state_wait);
1244
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 /* aborted verify run. log the last position */
1246 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1247 ns.conn < C_CONNECTED) {
1248 mdev->ov_start_sector =
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001249 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001250 dev_info(DEV, "Online Verify reached sector %llu\n",
1251 (unsigned long long)mdev->ov_start_sector);
1252 }
1253
1254 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1255 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
1256 dev_info(DEV, "Syncer continues.\n");
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001257 mdev->rs_paused += (long)jiffies
1258 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
Philipp Reisner63106d32010-09-01 15:47:15 +02001259 if (ns.conn == C_SYNC_TARGET)
1260 mod_timer(&mdev->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261 }
1262
1263 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
1264 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1265 dev_info(DEV, "Resync suspended\n");
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001266 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 }
1268
1269 if (os.conn == C_CONNECTED &&
1270 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001271 unsigned long now = jiffies;
1272 int i;
1273
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001274 set_ov_position(mdev, ns.conn);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001275 mdev->rs_start = now;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001276 mdev->rs_last_events = 0;
1277 mdev->rs_last_sect_ev = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278 mdev->ov_last_oos_size = 0;
1279 mdev->ov_last_oos_start = 0;
1280
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001281 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001282 mdev->rs_mark_left[i] = mdev->ov_left;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001283 mdev->rs_mark_time[i] = now;
1284 }
1285
Lars Ellenberg2649f082010-11-05 10:05:47 +01001286 drbd_rs_controller_reset(mdev);
1287
Philipp Reisnerb411b362009-09-25 16:07:19 -07001288 if (ns.conn == C_VERIFY_S) {
1289 dev_info(DEV, "Starting Online Verify from sector %llu\n",
1290 (unsigned long long)mdev->ov_position);
1291 mod_timer(&mdev->resync_timer, jiffies);
1292 }
1293 }
1294
1295 if (get_ldev(mdev)) {
1296 u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
1297 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
1298 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
1299
1300 if (test_bit(CRASHED_PRIMARY, &mdev->flags))
1301 mdf |= MDF_CRASHED_PRIMARY;
1302 if (mdev->state.role == R_PRIMARY ||
1303 (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
1304 mdf |= MDF_PRIMARY_IND;
1305 if (mdev->state.conn > C_WF_REPORT_PARAMS)
1306 mdf |= MDF_CONNECTED_IND;
1307 if (mdev->state.disk > D_INCONSISTENT)
1308 mdf |= MDF_CONSISTENT;
1309 if (mdev->state.disk > D_OUTDATED)
1310 mdf |= MDF_WAS_UP_TO_DATE;
1311 if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
1312 mdf |= MDF_PEER_OUT_DATED;
1313 if (mdf != mdev->ldev->md.flags) {
1314 mdev->ldev->md.flags = mdf;
1315 drbd_md_mark_dirty(mdev);
1316 }
1317 if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1318 drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
1319 put_ldev(mdev);
1320 }
1321
1322 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1323 if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1324 os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1325 set_bit(CONSIDER_RESYNC, &mdev->flags);
1326
1327 /* Receiver should clean up itself */
1328 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1329 drbd_thread_stop_nowait(&mdev->receiver);
1330
1331 /* Now the receiver finished cleaning up itself, it should die */
1332 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1333 drbd_thread_stop_nowait(&mdev->receiver);
1334
1335 /* Upon network failure, we need to restart the receiver. */
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001336 if (os.conn > C_WF_CONNECTION &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1338 drbd_thread_restart_nowait(&mdev->receiver);
1339
Philipp Reisner07782862010-08-31 12:00:50 +02001340 /* Resume AL writing if we get a connection */
1341 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1342 drbd_resume_al(mdev);
1343
Lars Ellenbergba280c02012-04-25 11:46:14 +02001344 /* remember last connect and attach times so request_timer_fn() won't
1345 * kill newly established sessions while we are still trying to thaw
1346 * previously frozen IO */
1347 if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
1348 mdev->last_reconnect_jif = jiffies;
1349 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1350 ns.disk > D_NEGOTIATING)
1351 mdev->last_reattach_jif = jiffies;
1352
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1354 if (ascw) {
1355 ascw->os = os;
1356 ascw->ns = ns;
1357 ascw->flags = flags;
1358 ascw->w.cb = w_after_state_ch;
1359 ascw->done = done;
1360 drbd_queue_work(&mdev->data.work, &ascw->w);
1361 } else {
1362 dev_warn(DEV, "Could not kmalloc an ascw\n");
1363 }
1364
1365 return rv;
1366}
1367
1368static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1369{
1370 struct after_state_chg_work *ascw =
1371 container_of(w, struct after_state_chg_work, w);
1372 after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
1373 if (ascw->flags & CS_WAIT_COMPLETE) {
1374 D_ASSERT(ascw->done != NULL);
1375 complete(ascw->done);
1376 }
1377 kfree(ascw);
1378
1379 return 1;
1380}
1381
1382static void abw_start_sync(struct drbd_conf *mdev, int rv)
1383{
1384 if (rv) {
1385 dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
1386 _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
1387 return;
1388 }
1389
1390 switch (mdev->state.conn) {
1391 case C_STARTING_SYNC_T:
1392 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1393 break;
1394 case C_STARTING_SYNC_S:
1395 drbd_start_resync(mdev, C_SYNC_SOURCE);
1396 break;
1397 }
1398}
1399
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001400int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1401 int (*io_fn)(struct drbd_conf *),
1402 char *why, enum bm_flag flags)
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001403{
1404 int rv;
1405
1406 D_ASSERT(current == mdev->worker.task);
1407
1408 /* open coded non-blocking drbd_suspend_io(mdev); */
1409 set_bit(SUSPEND_IO, &mdev->flags);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001410
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001411 drbd_bm_lock(mdev, why, flags);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001412 rv = io_fn(mdev);
1413 drbd_bm_unlock(mdev);
1414
1415 drbd_resume_io(mdev);
1416
1417 return rv;
1418}
1419
Philipp Reisnerb411b362009-09-25 16:07:19 -07001420/**
1421 * after_state_ch() - Perform after state change actions that may sleep
1422 * @mdev: DRBD device.
1423 * @os: old state.
1424 * @ns: new state.
1425 * @flags: Flags
1426 */
1427static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1428 union drbd_state ns, enum chg_state_flags flags)
1429{
1430 enum drbd_fencing_p fp;
Philipp Reisner67098932010-06-24 16:24:25 +02001431 enum drbd_req_event what = nothing;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001432 union drbd_state nsm = (union drbd_state){ .i = -1 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07001433
1434 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1435 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1436 if (mdev->p_uuid)
1437 mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
1438 }
1439
1440 fp = FP_DONT_CARE;
1441 if (get_ldev(mdev)) {
1442 fp = mdev->ldev->dc.fencing;
1443 put_ldev(mdev);
1444 }
1445
1446 /* Inform userspace about the change... */
1447 drbd_bcast_state(mdev, ns);
1448
1449 if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1450 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1451 drbd_khelper(mdev, "pri-on-incon-degr");
1452
1453 /* Here we have the actions that are performed after a
1454 state change. This function might sleep */
1455
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001456 if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
1457 mod_timer(&mdev->request_timer, jiffies + HZ);
1458
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001459 nsm.i = -1;
1460 if (ns.susp_nod) {
Philipp Reisner3f986882010-12-20 14:48:20 +01001461 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1462 what = resend;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001463
Philipp Reisner79f16f52011-07-15 18:44:26 +02001464 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1465 ns.disk > D_NEGOTIATING)
Philipp Reisner3f986882010-12-20 14:48:20 +01001466 what = restart_frozen_disk_io;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001467
Philipp Reisner3f986882010-12-20 14:48:20 +01001468 if (what != nothing)
1469 nsm.susp_nod = 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001470 }
1471
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001472 if (ns.susp_fen) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001473 /* case1: The outdate peer handler is successful: */
1474 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001475 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1476 drbd_uuid_new_current(mdev);
1477 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner43a51822010-06-11 11:26:34 +02001478 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479 spin_lock_irq(&mdev->req_lock);
Philipp Reisner509fc012012-07-31 11:22:58 +02001480 _tl_clear(mdev);
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001481 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482 spin_unlock_irq(&mdev->req_lock);
1483 }
Philipp Reisner43a51822010-06-11 11:26:34 +02001484 /* case2: The connection was established again: */
1485 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1486 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner67098932010-06-24 16:24:25 +02001487 what = resend;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001488 nsm.susp_fen = 0;
Philipp Reisner43a51822010-06-11 11:26:34 +02001489 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 }
Philipp Reisner67098932010-06-24 16:24:25 +02001491
1492 if (what != nothing) {
1493 spin_lock_irq(&mdev->req_lock);
1494 _tl_restart(mdev, what);
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001495 nsm.i &= mdev->state.i;
1496 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
Philipp Reisner67098932010-06-24 16:24:25 +02001497 spin_unlock_irq(&mdev->req_lock);
1498 }
1499
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001500 /* Became sync source. With protocol >= 96, we still need to send out
1501 * the sync uuid now. Need to do that before any drbd_send_state, or
1502 * the other side may go "paused sync" before receiving the sync uuids,
1503 * which is unexpected. */
1504 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1505 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1506 mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
1507 drbd_gen_and_send_sync_uuid(mdev);
1508 put_ldev(mdev);
1509 }
1510
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511 /* Do not change the order of the if above and the two below... */
1512 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
Lars Ellenberg0029d622012-06-14 18:02:52 +02001513 /* we probably will start a resync soon.
1514 * make sure those things are properly reset. */
1515 mdev->rs_total = 0;
1516 mdev->rs_failed = 0;
1517 atomic_set(&mdev->rs_pending_cnt, 0);
1518 drbd_rs_cancel_all(mdev);
1519
Philipp Reisnerb411b362009-09-25 16:07:19 -07001520 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001521 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522 }
Lars Ellenberg54b956a2011-01-20 10:47:53 +01001523 /* No point in queuing send_bitmap if we don't have a connection
1524 * anymore, so check also the _current_ state, not only the new state
1525 * at the time this work was queued. */
1526 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1527 mdev->state.conn == C_WF_BITMAP_S)
1528 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001529 "send_bitmap (WFBitMapS)",
1530 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531
1532 /* Lost contact to peer's copy of the data */
1533 if ((os.pdsk >= D_INCONSISTENT &&
1534 os.pdsk != D_UNKNOWN &&
1535 os.pdsk != D_OUTDATED)
1536 && (ns.pdsk < D_INCONSISTENT ||
1537 ns.pdsk == D_UNKNOWN ||
1538 ns.pdsk == D_OUTDATED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539 if (get_ldev(mdev)) {
1540 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001541 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001542 if (is_susp(mdev->state)) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001543 set_bit(NEW_CUR_UUID, &mdev->flags);
1544 } else {
1545 drbd_uuid_new_current(mdev);
1546 drbd_send_uuids(mdev);
1547 }
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001548 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 put_ldev(mdev);
1550 }
1551 }
1552
1553 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
Philipp Reisnerbca482e2011-07-15 12:14:27 +02001554 if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
1555 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001556 drbd_uuid_new_current(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02001557 drbd_send_uuids(mdev);
1558 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 /* D_DISKLESS Peer becomes secondary */
1560 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001561 /* We may still be Primary ourselves.
1562 * No harm done if the bitmap still changes,
1563 * redirtied pages will follow later. */
1564 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1565 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001566 put_ldev(mdev);
1567 }
1568
Lars Ellenberg06d33e92010-12-18 17:00:59 +01001569 /* Write out all changed bits on demote.
1570 * Though, no need to da that just yet
1571 * if there is a resync going on still */
1572 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1573 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001574 /* No changes to the bitmap expected this time, so assert that,
1575 * even though no harm was done if it did change. */
1576 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1577 "demote", BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 put_ldev(mdev);
1579 }
1580
1581 /* Last part of the attaching process ... */
1582 if (ns.conn >= C_CONNECTED &&
1583 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01001584 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001585 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001586 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587 }
1588
1589 /* We want to pause/continue resync, tell peer. */
1590 if (ns.conn >= C_CONNECTED &&
1591 ((os.aftr_isp != ns.aftr_isp) ||
1592 (os.user_isp != ns.user_isp)))
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001593 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594
1595 /* In case one of the isp bits got set, suspend other devices. */
1596 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1597 (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1598 suspend_other_sg(mdev);
1599
1600 /* Make sure the peer gets informed about eventual state
1601 changes (ISP bits) while we were in WFReportParams. */
1602 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001603 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604
Philipp Reisner67531712010-10-27 12:21:30 +02001605 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001606 drbd_send_state(mdev, ns);
Philipp Reisner67531712010-10-27 12:21:30 +02001607
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608 /* We are in the progress to start a full sync... */
1609 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1610 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001611 /* no other bitmap changes expected during this phase */
1612 drbd_queue_bitmap_io(mdev,
1613 &drbd_bmio_set_n_write, &abw_start_sync,
1614 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615
1616 /* We are invalidating our self... */
1617 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1618 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001619 /* other bitmap operation expected during this phase */
1620 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1621 "set_n_write from invalidate", BM_LOCKED_MASK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001623 /* first half of local IO error, failure to attach,
1624 * or administrative detach */
1625 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
Philipp Reisner7caacb62011-12-14 18:01:21 +01001626 enum drbd_io_error_p eh = EP_PASS_ON;
1627 int was_io_error = 0;
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001628 /* corresponding get_ldev was in __drbd_set_state, to serialize
Philipp Reisner7caacb62011-12-14 18:01:21 +01001629 * our cleanup here with the transition to D_DISKLESS.
1630 * But is is still not save to dreference ldev here, since
1631 * we might come from an failed Attach before ldev was set. */
1632 if (mdev->ldev) {
1633 eh = mdev->ldev->dc.on_io_error;
1634 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001635
Lars Ellenberg63a6d0b2012-06-18 11:46:01 +02001636 if (was_io_error && eh == EP_CALL_HELPER)
1637 drbd_khelper(mdev, "local-io-error");
1638
Lars Ellenberg383606e2012-06-14 14:21:32 +02001639 /* Immediately allow completion of all application IO,
1640 * that waits for completion from the local disk,
1641 * if this was a force-detach due to disk_timeout
1642 * or administrator request (drbdsetup detach --force).
1643 * Do NOT abort otherwise.
1644 * Aborting local requests may cause serious problems,
1645 * if requests are completed to upper layers already,
1646 * and then later the already submitted local bio completes.
1647 * This can cause DMA into former bio pages that meanwhile
1648 * have been re-used for other things.
1649 * So aborting local requests may cause crashes,
1650 * or even worse, silent data corruption.
1651 */
1652 if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
1653 tl_abort_disk_io(mdev);
Philipp Reisner2b4dd362011-03-14 13:01:50 +01001654
Philipp Reisner7caacb62011-12-14 18:01:21 +01001655 /* current state still has to be D_FAILED,
1656 * there is only one way out: to D_DISKLESS,
1657 * and that may only happen after our put_ldev below. */
1658 if (mdev->state.disk != D_FAILED)
1659 dev_err(DEV,
1660 "ASSERT FAILED: disk is %s during detach\n",
1661 drbd_disk_str(mdev->state.disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662
Philipp Reisner7caacb62011-12-14 18:01:21 +01001663 if (ns.conn >= C_CONNECTED)
1664 drbd_send_state(mdev, ns);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001665
Philipp Reisner7caacb62011-12-14 18:01:21 +01001666 drbd_rs_cancel_all(mdev);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001667
Philipp Reisner7caacb62011-12-14 18:01:21 +01001668 /* In case we want to get something to stable storage still,
1669 * this may be the last chance.
1670 * Following put_ldev may transition to D_DISKLESS. */
1671 drbd_md_sync(mdev);
1672 }
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001673 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001674 }
1675
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001676 /* second half of local IO error, failure to attach,
1677 * or administrative detach,
1678 * after local_cnt references have reached zero again */
1679 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1680 /* We must still be diskless,
1681 * re-attach has to be serialized with this! */
1682 if (mdev->state.disk != D_DISKLESS)
1683 dev_err(DEV,
1684 "ASSERT FAILED: disk is %s while going diskless\n",
1685 drbd_disk_str(mdev->state.disk));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001686
Philipp Reisner4afc4332011-12-13 10:31:32 +01001687 if (ns.conn >= C_CONNECTED)
1688 drbd_send_state(mdev, ns);
1689
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001690 /* corresponding get_ldev in __drbd_set_state
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001691 * this may finally trigger drbd_ldev_destroy. */
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001692 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693 }
1694
Philipp Reisner738a84b2011-03-03 00:21:30 +01001695 /* Notify peer that I had a local IO error, and did not detached.. */
Philipp Reisner4afc4332011-12-13 10:31:32 +01001696 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001697 drbd_send_state(mdev, ns);
Philipp Reisner738a84b2011-03-03 00:21:30 +01001698
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699 /* Disks got bigger while they were detached */
1700 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1701 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1702 if (ns.conn == C_CONNECTED)
1703 resync_after_online_grow(mdev);
1704 }
1705
1706 /* A resync finished or aborted, wake paused devices... */
1707 if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1708 (os.peer_isp && !ns.peer_isp) ||
1709 (os.user_isp && !ns.user_isp))
1710 resume_next_sg(mdev);
1711
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001712 /* sync target done with resync. Explicitly notify peer, even though
1713 * it should (at least for non-empty resyncs) already know itself. */
1714 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001715 drbd_send_state(mdev, ns);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001716
Philipp Reisner197296f2012-03-26 16:47:11 +02001717 /* Wake up role changes, that were delayed because of connection establishing */
1718 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
1719 clear_bit(STATE_SENT, &mdev->flags);
1720 wake_up(&mdev->state_wait);
1721 }
1722
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001723 /* This triggers bitmap writeout of potentially still unwritten pages
1724 * if the resync finished cleanly, or aborted because of peer disk
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001725 * failure, or because of connection loss.
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001726 * For resync aborted because of local disk failure, we cannot do
1727 * any bitmap writeout anymore.
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001728 * No harm done if some bits change during this phase.
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001729 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001730 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
Lars Ellenberg0e8488a2012-04-25 23:06:45 +02001731 drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
1732 "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001733 put_ldev(mdev);
1734 }
Lars Ellenberg02851e92010-12-16 14:47:39 +01001735
Philipp Reisnerf70b35112010-06-24 14:34:40 +02001736 /* free tl_hash if we Got thawed and are C_STANDALONE */
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001737 if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02001738 drbd_free_tl_hash(mdev);
1739
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 /* Upon network connection, we need to start the receiver */
1741 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1742 drbd_thread_start(&mdev->receiver);
1743
1744 /* Terminate worker thread if we are unconfigured - it will be
1745 restarted as needed... */
1746 if (ns.disk == D_DISKLESS &&
1747 ns.conn == C_STANDALONE &&
1748 ns.role == R_SECONDARY) {
1749 if (os.aftr_isp != ns.aftr_isp)
1750 resume_next_sg(mdev);
1751 /* set in __drbd_set_state, unless CONFIG_PENDING was set */
1752 if (test_bit(DEVICE_DYING, &mdev->flags))
1753 drbd_thread_stop_nowait(&mdev->worker);
1754 }
1755
1756 drbd_md_sync(mdev);
1757}
1758
1759
1760static int drbd_thread_setup(void *arg)
1761{
1762 struct drbd_thread *thi = (struct drbd_thread *) arg;
1763 struct drbd_conf *mdev = thi->mdev;
1764 unsigned long flags;
1765 int retval;
1766
1767restart:
1768 retval = thi->function(thi);
1769
1770 spin_lock_irqsave(&thi->t_lock, flags);
1771
1772 /* if the receiver has been "Exiting", the last thing it did
1773 * was set the conn state to "StandAlone",
1774 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
1775 * and receiver thread will be "started".
1776 * drbd_thread_start needs to set "Restarting" in that case.
1777 * t_state check and assignment needs to be within the same spinlock,
1778 * so either thread_start sees Exiting, and can remap to Restarting,
1779 * or thread_start see None, and can proceed as normal.
1780 */
1781
1782 if (thi->t_state == Restarting) {
1783 dev_info(DEV, "Restarting %s\n", current->comm);
1784 thi->t_state = Running;
1785 spin_unlock_irqrestore(&thi->t_lock, flags);
1786 goto restart;
1787 }
1788
1789 thi->task = NULL;
1790 thi->t_state = None;
1791 smp_mb();
1792 complete(&thi->stop);
1793 spin_unlock_irqrestore(&thi->t_lock, flags);
1794
1795 dev_info(DEV, "Terminating %s\n", current->comm);
1796
1797 /* Release mod reference taken when thread was started */
1798 module_put(THIS_MODULE);
1799 return retval;
1800}
1801
1802static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
1803 int (*func) (struct drbd_thread *))
1804{
1805 spin_lock_init(&thi->t_lock);
1806 thi->task = NULL;
1807 thi->t_state = None;
1808 thi->function = func;
1809 thi->mdev = mdev;
1810}
1811
1812int drbd_thread_start(struct drbd_thread *thi)
1813{
1814 struct drbd_conf *mdev = thi->mdev;
1815 struct task_struct *nt;
1816 unsigned long flags;
1817
1818 const char *me =
1819 thi == &mdev->receiver ? "receiver" :
1820 thi == &mdev->asender ? "asender" :
1821 thi == &mdev->worker ? "worker" : "NONSENSE";
1822
1823 /* is used from state engine doing drbd_thread_stop_nowait,
1824 * while holding the req lock irqsave */
1825 spin_lock_irqsave(&thi->t_lock, flags);
1826
1827 switch (thi->t_state) {
1828 case None:
1829 dev_info(DEV, "Starting %s thread (from %s [%d])\n",
1830 me, current->comm, current->pid);
1831
1832 /* Get ref on module for thread - this is released when thread exits */
1833 if (!try_module_get(THIS_MODULE)) {
1834 dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
1835 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001836 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837 }
1838
1839 init_completion(&thi->stop);
1840 D_ASSERT(thi->task == NULL);
1841 thi->reset_cpu_mask = 1;
1842 thi->t_state = Running;
1843 spin_unlock_irqrestore(&thi->t_lock, flags);
1844 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
1845
1846 nt = kthread_create(drbd_thread_setup, (void *) thi,
1847 "drbd%d_%s", mdev_to_minor(mdev), me);
1848
1849 if (IS_ERR(nt)) {
1850 dev_err(DEV, "Couldn't start thread\n");
1851
1852 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001853 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854 }
1855 spin_lock_irqsave(&thi->t_lock, flags);
1856 thi->task = nt;
1857 thi->t_state = Running;
1858 spin_unlock_irqrestore(&thi->t_lock, flags);
1859 wake_up_process(nt);
1860 break;
1861 case Exiting:
1862 thi->t_state = Restarting;
1863 dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
1864 me, current->comm, current->pid);
1865 /* fall through */
1866 case Running:
1867 case Restarting:
1868 default:
1869 spin_unlock_irqrestore(&thi->t_lock, flags);
1870 break;
1871 }
1872
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001873 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001874}
1875
1876
1877void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
1878{
1879 unsigned long flags;
1880
1881 enum drbd_thread_state ns = restart ? Restarting : Exiting;
1882
1883 /* may be called from state engine, holding the req lock irqsave */
1884 spin_lock_irqsave(&thi->t_lock, flags);
1885
1886 if (thi->t_state == None) {
1887 spin_unlock_irqrestore(&thi->t_lock, flags);
1888 if (restart)
1889 drbd_thread_start(thi);
1890 return;
1891 }
1892
1893 if (thi->t_state != ns) {
1894 if (thi->task == NULL) {
1895 spin_unlock_irqrestore(&thi->t_lock, flags);
1896 return;
1897 }
1898
1899 thi->t_state = ns;
1900 smp_mb();
1901 init_completion(&thi->stop);
1902 if (thi->task != current)
1903 force_sig(DRBD_SIGKILL, thi->task);
1904
1905 }
1906
1907 spin_unlock_irqrestore(&thi->t_lock, flags);
1908
1909 if (wait)
1910 wait_for_completion(&thi->stop);
1911}
1912
1913#ifdef CONFIG_SMP
1914/**
1915 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
1916 * @mdev: DRBD device.
1917 *
1918 * Forces all threads of a device onto the same CPU. This is beneficial for
1919 * DRBD's performance. May be overwritten by user's configuration.
1920 */
1921void drbd_calc_cpu_mask(struct drbd_conf *mdev)
1922{
1923 int ord, cpu;
1924
1925 /* user override. */
1926 if (cpumask_weight(mdev->cpu_mask))
1927 return;
1928
1929 ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
1930 for_each_online_cpu(cpu) {
1931 if (ord-- == 0) {
1932 cpumask_set_cpu(cpu, mdev->cpu_mask);
1933 return;
1934 }
1935 }
1936 /* should not be reached */
1937 cpumask_setall(mdev->cpu_mask);
1938}
1939
1940/**
1941 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
1942 * @mdev: DRBD device.
1943 *
1944 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
1945 * prematurely.
1946 */
1947void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
1948{
1949 struct task_struct *p = current;
1950 struct drbd_thread *thi =
1951 p == mdev->asender.task ? &mdev->asender :
1952 p == mdev->receiver.task ? &mdev->receiver :
1953 p == mdev->worker.task ? &mdev->worker :
1954 NULL;
1955 ERR_IF(thi == NULL)
1956 return;
1957 if (!thi->reset_cpu_mask)
1958 return;
1959 thi->reset_cpu_mask = 0;
1960 set_cpus_allowed_ptr(p, mdev->cpu_mask);
1961}
1962#endif
1963
1964/* the appropriate socket mutex must be held already */
1965int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001966 enum drbd_packets cmd, struct p_header80 *h,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001967 size_t size, unsigned msg_flags)
1968{
1969 int sent, ok;
1970
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001971 ERR_IF(!h) return false;
1972 ERR_IF(!size) return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001973
1974 h->magic = BE_DRBD_MAGIC;
1975 h->command = cpu_to_be16(cmd);
Philipp Reisner0b70a132010-08-20 13:36:10 +02001976 h->length = cpu_to_be16(size-sizeof(struct p_header80));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977
Philipp Reisnerb411b362009-09-25 16:07:19 -07001978 sent = drbd_send(mdev, sock, h, size, msg_flags);
1979
1980 ok = (sent == size);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001981 if (!ok && !signal_pending(current))
1982 dev_warn(DEV, "short sent %s size=%d sent=%d\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001983 cmdname(cmd), (int)size, sent);
1984 return ok;
1985}
1986
1987/* don't pass the socket. we may only look at it
1988 * when we hold the appropriate socket mutex.
1989 */
1990int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001991 enum drbd_packets cmd, struct p_header80 *h, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992{
1993 int ok = 0;
1994 struct socket *sock;
1995
1996 if (use_data_socket) {
1997 mutex_lock(&mdev->data.mutex);
1998 sock = mdev->data.socket;
1999 } else {
2000 mutex_lock(&mdev->meta.mutex);
2001 sock = mdev->meta.socket;
2002 }
2003
2004 /* drbd_disconnect() could have called drbd_free_sock()
2005 * while we were waiting in down()... */
2006 if (likely(sock != NULL))
2007 ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
2008
2009 if (use_data_socket)
2010 mutex_unlock(&mdev->data.mutex);
2011 else
2012 mutex_unlock(&mdev->meta.mutex);
2013 return ok;
2014}
2015
2016int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
2017 size_t size)
2018{
Philipp Reisner0b70a132010-08-20 13:36:10 +02002019 struct p_header80 h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020 int ok;
2021
2022 h.magic = BE_DRBD_MAGIC;
2023 h.command = cpu_to_be16(cmd);
2024 h.length = cpu_to_be16(size);
2025
2026 if (!drbd_get_data_sock(mdev))
2027 return 0;
2028
Philipp Reisnerb411b362009-09-25 16:07:19 -07002029 ok = (sizeof(h) ==
2030 drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
2031 ok = ok && (size ==
2032 drbd_send(mdev, mdev->data.socket, data, size, 0));
2033
2034 drbd_put_data_sock(mdev);
2035
2036 return ok;
2037}
2038
2039int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
2040{
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002041 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042 struct socket *sock;
2043 int size, rv;
2044 const int apv = mdev->agreed_pro_version;
2045
2046 size = apv <= 87 ? sizeof(struct p_rs_param)
2047 : apv == 88 ? sizeof(struct p_rs_param)
2048 + strlen(mdev->sync_conf.verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002049 : apv <= 94 ? sizeof(struct p_rs_param_89)
2050 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051
2052 /* used from admin command context and receiver/worker context.
2053 * to avoid kmalloc, grab the socket right here,
2054 * then use the pre-allocated sbuf there */
2055 mutex_lock(&mdev->data.mutex);
2056 sock = mdev->data.socket;
2057
2058 if (likely(sock != NULL)) {
2059 enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
2060
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002061 p = &mdev->data.sbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002062
2063 /* initialize verify_alg and csums_alg */
2064 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2065
2066 p->rate = cpu_to_be32(sc->rate);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002067 p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
2068 p->c_delay_target = cpu_to_be32(sc->c_delay_target);
2069 p->c_fill_target = cpu_to_be32(sc->c_fill_target);
2070 p->c_max_rate = cpu_to_be32(sc->c_max_rate);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071
2072 if (apv >= 88)
2073 strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
2074 if (apv >= 89)
2075 strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
2076
2077 rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
2078 } else
2079 rv = 0; /* not ok */
2080
2081 mutex_unlock(&mdev->data.mutex);
2082
2083 return rv;
2084}
2085
2086int drbd_send_protocol(struct drbd_conf *mdev)
2087{
2088 struct p_protocol *p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002089 int size, cf, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002090
2091 size = sizeof(struct p_protocol);
2092
2093 if (mdev->agreed_pro_version >= 87)
2094 size += strlen(mdev->net_conf->integrity_alg) + 1;
2095
2096 /* we must not recurse into our own queue,
2097 * as that is blocked during handshake */
2098 p = kmalloc(size, GFP_NOIO);
2099 if (p == NULL)
2100 return 0;
2101
2102 p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
2103 p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
2104 p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
2105 p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002106 p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
2107
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002108 cf = 0;
2109 if (mdev->net_conf->want_lose)
2110 cf |= CF_WANT_LOSE;
2111 if (mdev->net_conf->dry_run) {
2112 if (mdev->agreed_pro_version >= 92)
2113 cf |= CF_DRY_RUN;
2114 else {
2115 dev_err(DEV, "--dry-run is not supported by peer");
Dan Carpenter7ac314c2010-04-22 14:27:23 +02002116 kfree(p);
Philipp Reisner148efa12011-01-15 00:21:15 +01002117 return -1;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002118 }
2119 }
2120 p->conn_flags = cpu_to_be32(cf);
2121
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 if (mdev->agreed_pro_version >= 87)
2123 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
2124
2125 rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002126 (struct p_header80 *)p, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002127 kfree(p);
2128 return rv;
2129}
2130
2131int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
2132{
2133 struct p_uuids p;
2134 int i;
2135
2136 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
2137 return 1;
2138
2139 for (i = UI_CURRENT; i < UI_SIZE; i++)
2140 p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
2141
2142 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
2143 p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
2144 uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
2145 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
2146 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
2147 p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
2148
2149 put_ldev(mdev);
2150
2151 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002152 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153}
2154
2155int drbd_send_uuids(struct drbd_conf *mdev)
2156{
2157 return _drbd_send_uuids(mdev, 0);
2158}
2159
2160int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
2161{
2162 return _drbd_send_uuids(mdev, 8);
2163}
2164
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002165void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
2166{
2167 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2168 u64 *uuid = mdev->ldev->md.uuid;
2169 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
2170 text,
2171 (unsigned long long)uuid[UI_CURRENT],
2172 (unsigned long long)uuid[UI_BITMAP],
2173 (unsigned long long)uuid[UI_HISTORY_START],
2174 (unsigned long long)uuid[UI_HISTORY_END]);
2175 put_ldev(mdev);
2176 } else {
2177 dev_info(DEV, "%s effective data uuid: %016llX\n",
2178 text,
2179 (unsigned long long)mdev->ed_uuid);
2180 }
2181}
2182
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002183int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002184{
2185 struct p_rs_uuid p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002186 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002188 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
2189
Philipp Reisner5ba3dac2011-10-05 15:54:18 +02002190 uuid = mdev->ldev->md.uuid[UI_BITMAP];
2191 if (uuid && uuid != UUID_JUST_CREATED)
2192 uuid = uuid + UUID_NEW_BM_OFFSET;
2193 else
2194 get_random_bytes(&uuid, sizeof(u64));
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002195 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002196 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002197 drbd_md_sync(mdev);
2198 p.uuid = cpu_to_be64(uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199
2200 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002201 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202}
2203
Philipp Reisnere89b5912010-03-24 17:11:33 +01002204int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205{
2206 struct p_sizes p;
2207 sector_t d_size, u_size;
Lars Ellenbergdb141b22012-06-25 19:15:58 +02002208 int q_order_type;
2209 unsigned int max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210 int ok;
2211
2212 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2213 D_ASSERT(mdev->ldev->backing_bdev);
2214 d_size = drbd_get_max_capacity(mdev->ldev);
2215 u_size = mdev->ldev->dc.disk_size;
2216 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002217 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
Lars Ellenbergdb141b22012-06-25 19:15:58 +02002218 max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219 put_ldev(mdev);
2220 } else {
2221 d_size = 0;
2222 u_size = 0;
2223 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002224 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225 }
2226
Philipp Reisner68093842011-06-30 15:43:06 +02002227 /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
2228 if (mdev->agreed_pro_version <= 94)
Lars Ellenbergdb141b22012-06-25 19:15:58 +02002229 max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
Philipp Reisner68093842011-06-30 15:43:06 +02002230
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 p.d_size = cpu_to_be64(d_size);
2232 p.u_size = cpu_to_be64(u_size);
2233 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
Philipp Reisner99432fc2011-05-20 16:39:13 +02002234 p.max_bio_size = cpu_to_be32(max_bio_size);
Philipp Reisnere89b5912010-03-24 17:11:33 +01002235 p.queue_order_type = cpu_to_be16(q_order_type);
2236 p.dds_flags = cpu_to_be16(flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002237
2238 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002239 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 return ok;
2241}
2242
2243/**
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002244 * drbd_send_current_state() - Sends the drbd state to the peer
Philipp Reisnerb411b362009-09-25 16:07:19 -07002245 * @mdev: DRBD device.
2246 */
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002247int drbd_send_current_state(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248{
2249 struct socket *sock;
2250 struct p_state p;
2251 int ok = 0;
2252
2253 /* Grab state lock so we wont send state if we're in the middle
2254 * of a cluster wide state change on another thread */
2255 drbd_state_lock(mdev);
2256
2257 mutex_lock(&mdev->data.mutex);
2258
2259 p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
2260 sock = mdev->data.socket;
2261
2262 if (likely(sock != NULL)) {
2263 ok = _drbd_send_cmd(mdev, sock, P_STATE,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002264 (struct p_header80 *)&p, sizeof(p), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 }
2266
2267 mutex_unlock(&mdev->data.mutex);
2268
2269 drbd_state_unlock(mdev);
2270 return ok;
2271}
2272
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002273/**
2274 * drbd_send_state() - After a state change, sends the new state to the peer
2275 * @mdev: DRBD device.
2276 * @state: the state to send, not necessarily the current state.
2277 *
2278 * Each state change queues an "after_state_ch" work, which will eventually
2279 * send the resulting new state to the peer. If more state changes happen
2280 * between queuing and processing of the after_state_ch work, we still
2281 * want to send each intermediary state in the order it occurred.
2282 */
2283int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
2284{
2285 struct socket *sock;
2286 struct p_state p;
2287 int ok = 0;
2288
2289 mutex_lock(&mdev->data.mutex);
2290
2291 p.state = cpu_to_be32(state.i);
2292 sock = mdev->data.socket;
2293
2294 if (likely(sock != NULL)) {
2295 ok = _drbd_send_cmd(mdev, sock, P_STATE,
2296 (struct p_header80 *)&p, sizeof(p), 0);
2297 }
2298
2299 mutex_unlock(&mdev->data.mutex);
2300
2301 return ok;
2302}
2303
Philipp Reisnerb411b362009-09-25 16:07:19 -07002304int drbd_send_state_req(struct drbd_conf *mdev,
2305 union drbd_state mask, union drbd_state val)
2306{
2307 struct p_req_state p;
2308
2309 p.mask = cpu_to_be32(mask.i);
2310 p.val = cpu_to_be32(val.i);
2311
2312 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002313 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002314}
2315
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01002316int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317{
2318 struct p_req_state_reply p;
2319
2320 p.retcode = cpu_to_be32(retcode);
2321
2322 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002323 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324}
2325
2326int fill_bitmap_rle_bits(struct drbd_conf *mdev,
2327 struct p_compressed_bm *p,
2328 struct bm_xfer_ctx *c)
2329{
2330 struct bitstream bs;
2331 unsigned long plain_bits;
2332 unsigned long tmp;
2333 unsigned long rl;
2334 unsigned len;
2335 unsigned toggle;
2336 int bits;
2337
2338 /* may we use this feature? */
2339 if ((mdev->sync_conf.use_rle == 0) ||
2340 (mdev->agreed_pro_version < 90))
2341 return 0;
2342
2343 if (c->bit_offset >= c->bm_bits)
2344 return 0; /* nothing to do. */
2345
2346 /* use at most thus many bytes */
2347 bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
2348 memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
2349 /* plain bits covered in this code string */
2350 plain_bits = 0;
2351
2352 /* p->encoding & 0x80 stores whether the first run length is set.
2353 * bit offset is implicit.
2354 * start with toggle == 2 to be able to tell the first iteration */
2355 toggle = 2;
2356
2357 /* see how much plain bits we can stuff into one packet
2358 * using RLE and VLI. */
2359 do {
2360 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
2361 : _drbd_bm_find_next(mdev, c->bit_offset);
2362 if (tmp == -1UL)
2363 tmp = c->bm_bits;
2364 rl = tmp - c->bit_offset;
2365
2366 if (toggle == 2) { /* first iteration */
2367 if (rl == 0) {
2368 /* the first checked bit was set,
2369 * store start value, */
2370 DCBP_set_start(p, 1);
2371 /* but skip encoding of zero run length */
2372 toggle = !toggle;
2373 continue;
2374 }
2375 DCBP_set_start(p, 0);
2376 }
2377
2378 /* paranoia: catch zero runlength.
2379 * can only happen if bitmap is modified while we scan it. */
2380 if (rl == 0) {
2381 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
2382 "t:%u bo:%lu\n", toggle, c->bit_offset);
2383 return -1;
2384 }
2385
2386 bits = vli_encode_bits(&bs, rl);
2387 if (bits == -ENOBUFS) /* buffer full */
2388 break;
2389 if (bits <= 0) {
2390 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
2391 return 0;
2392 }
2393
2394 toggle = !toggle;
2395 plain_bits += rl;
2396 c->bit_offset = tmp;
2397 } while (c->bit_offset < c->bm_bits);
2398
2399 len = bs.cur.b - p->code + !!bs.cur.bit;
2400
2401 if (plain_bits < (len << 3)) {
2402 /* incompressible with this method.
2403 * we need to rewind both word and bit position. */
2404 c->bit_offset -= plain_bits;
2405 bm_xfer_ctx_bit_to_word_offset(c);
2406 c->bit_offset = c->word_offset * BITS_PER_LONG;
2407 return 0;
2408 }
2409
2410 /* RLE + VLI was able to compress it just fine.
2411 * update c->word_offset. */
2412 bm_xfer_ctx_bit_to_word_offset(c);
2413
2414 /* store pad_bits */
2415 DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
2416
2417 return len;
2418}
2419
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002420/**
2421 * send_bitmap_rle_or_plain
2422 *
2423 * Return 0 when done, 1 when another iteration is needed, and a negative error
2424 * code upon failure.
2425 */
2426static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07002427send_bitmap_rle_or_plain(struct drbd_conf *mdev,
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002428 struct p_header80 *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002429{
2430 struct p_compressed_bm *p = (void*)h;
2431 unsigned long num_words;
2432 int len;
2433 int ok;
2434
2435 len = fill_bitmap_rle_bits(mdev, p, c);
2436
2437 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002438 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002439
2440 if (len) {
2441 DCBP_set_code(p, RLE_VLI_Bits);
2442 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
2443 sizeof(*p) + len, 0);
2444
2445 c->packets[0]++;
2446 c->bytes[0] += sizeof(*p) + len;
2447
2448 if (c->bit_offset >= c->bm_bits)
2449 len = 0; /* DONE */
2450 } else {
2451 /* was not compressible.
2452 * send a buffer full of plain text bits instead. */
2453 num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
2454 len = num_words * sizeof(long);
2455 if (len)
2456 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
2457 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002458 h, sizeof(struct p_header80) + len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002459 c->word_offset += num_words;
2460 c->bit_offset = c->word_offset * BITS_PER_LONG;
2461
2462 c->packets[1]++;
Philipp Reisner0b70a132010-08-20 13:36:10 +02002463 c->bytes[1] += sizeof(struct p_header80) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002464
2465 if (c->bit_offset > c->bm_bits)
2466 c->bit_offset = c->bm_bits;
2467 }
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002468 if (ok) {
2469 if (len == 0) {
2470 INFO_bm_xfer_stats(mdev, "send", c);
2471 return 0;
2472 } else
2473 return 1;
2474 }
2475 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002476}
2477
2478/* See the comment at receive_bitmap() */
2479int _drbd_send_bitmap(struct drbd_conf *mdev)
2480{
2481 struct bm_xfer_ctx c;
Philipp Reisner0b70a132010-08-20 13:36:10 +02002482 struct p_header80 *p;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002483 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002484
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002485 ERR_IF(!mdev->bitmap) return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002486
2487 /* maybe we should use some per thread scratch page,
2488 * and allocate that during initial device creation? */
Philipp Reisner0b70a132010-08-20 13:36:10 +02002489 p = (struct p_header80 *) __get_free_page(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002490 if (!p) {
2491 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002492 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002493 }
2494
2495 if (get_ldev(mdev)) {
2496 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
2497 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
2498 drbd_bm_set_all(mdev);
2499 if (drbd_bm_write(mdev)) {
2500 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
2501 * but otherwise process as per normal - need to tell other
2502 * side that a full resync is required! */
2503 dev_err(DEV, "Failed to write bitmap to disk!\n");
2504 } else {
2505 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2506 drbd_md_sync(mdev);
2507 }
2508 }
2509 put_ldev(mdev);
2510 }
2511
2512 c = (struct bm_xfer_ctx) {
2513 .bm_bits = drbd_bm_bits(mdev),
2514 .bm_words = drbd_bm_words(mdev),
2515 };
2516
2517 do {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002518 err = send_bitmap_rle_or_plain(mdev, p, &c);
2519 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002520
2521 free_page((unsigned long) p);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002522 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002523}
2524
2525int drbd_send_bitmap(struct drbd_conf *mdev)
2526{
2527 int err;
2528
2529 if (!drbd_get_data_sock(mdev))
2530 return -1;
2531 err = !_drbd_send_bitmap(mdev);
2532 drbd_put_data_sock(mdev);
2533 return err;
2534}
2535
2536int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
2537{
2538 int ok;
2539 struct p_barrier_ack p;
2540
2541 p.barrier = barrier_nr;
2542 p.set_size = cpu_to_be32(set_size);
2543
2544 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002545 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002546 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002547 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548 return ok;
2549}
2550
2551/**
2552 * _drbd_send_ack() - Sends an ack packet
2553 * @mdev: DRBD device.
2554 * @cmd: Packet command code.
2555 * @sector: sector, needs to be in big endian byte order
2556 * @blksize: size in byte, needs to be in big endian byte order
2557 * @block_id: Id, big endian byte order
2558 */
2559static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
2560 u64 sector,
2561 u32 blksize,
2562 u64 block_id)
2563{
2564 int ok;
2565 struct p_block_ack p;
2566
2567 p.sector = sector;
2568 p.block_id = block_id;
2569 p.blksize = blksize;
2570 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
2571
2572 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002573 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002574 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002575 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002576 return ok;
2577}
2578
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002579/* dp->sector and dp->block_id already/still in network byte order,
2580 * data_size is payload size according to dp->head,
2581 * and may need to be corrected for digest size. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002582int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002583 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584{
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002585 data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
2586 crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002587 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
2588 dp->block_id);
2589}
2590
2591int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
2592 struct p_block_req *rp)
2593{
2594 return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
2595}
2596
2597/**
2598 * drbd_send_ack() - Sends an ack packet
2599 * @mdev: DRBD device.
2600 * @cmd: Packet command code.
2601 * @e: Epoch entry.
2602 */
2603int drbd_send_ack(struct drbd_conf *mdev,
2604 enum drbd_packets cmd, struct drbd_epoch_entry *e)
2605{
2606 return _drbd_send_ack(mdev, cmd,
2607 cpu_to_be64(e->sector),
2608 cpu_to_be32(e->size),
2609 e->block_id);
2610}
2611
2612/* This function misuses the block_id field to signal if the blocks
2613 * are is sync or not. */
2614int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
2615 sector_t sector, int blksize, u64 block_id)
2616{
2617 return _drbd_send_ack(mdev, cmd,
2618 cpu_to_be64(sector),
2619 cpu_to_be32(blksize),
2620 cpu_to_be64(block_id));
2621}
2622
2623int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
2624 sector_t sector, int size, u64 block_id)
2625{
2626 int ok;
2627 struct p_block_req p;
2628
2629 p.sector = cpu_to_be64(sector);
2630 p.block_id = block_id;
2631 p.blksize = cpu_to_be32(size);
2632
2633 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002634 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002635 return ok;
2636}
2637
2638int drbd_send_drequest_csum(struct drbd_conf *mdev,
2639 sector_t sector, int size,
2640 void *digest, int digest_size,
2641 enum drbd_packets cmd)
2642{
2643 int ok;
2644 struct p_block_req p;
2645
2646 p.sector = cpu_to_be64(sector);
2647 p.block_id = BE_DRBD_MAGIC + 0xbeef;
2648 p.blksize = cpu_to_be32(size);
2649
2650 p.head.magic = BE_DRBD_MAGIC;
2651 p.head.command = cpu_to_be16(cmd);
Philipp Reisner0b70a132010-08-20 13:36:10 +02002652 p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653
2654 mutex_lock(&mdev->data.mutex);
2655
2656 ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
2657 ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
2658
2659 mutex_unlock(&mdev->data.mutex);
2660
2661 return ok;
2662}
2663
2664int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2665{
2666 int ok;
2667 struct p_block_req p;
2668
2669 p.sector = cpu_to_be64(sector);
2670 p.block_id = BE_DRBD_MAGIC + 0xbabe;
2671 p.blksize = cpu_to_be32(size);
2672
2673 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002674 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002675 return ok;
2676}
2677
2678/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002679 * returns false if we should retry,
2680 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07002681 */
2682static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
2683{
2684 int drop_it;
2685 /* long elapsed = (long)(jiffies - mdev->last_received); */
2686
2687 drop_it = mdev->meta.socket == sock
2688 || !mdev->asender.task
2689 || get_t_state(&mdev->asender) != Running
2690 || mdev->state.conn < C_CONNECTED;
2691
2692 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002693 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002694
2695 drop_it = !--mdev->ko_count;
2696 if (!drop_it) {
2697 dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
2698 current->comm, current->pid, mdev->ko_count);
2699 request_ping(mdev);
2700 }
2701
2702 return drop_it; /* && (mdev->state == R_PRIMARY) */;
2703}
2704
2705/* The idea of sendpage seems to be to put some kind of reference
2706 * to the page into the skb, and to hand it over to the NIC. In
2707 * this process get_page() gets called.
2708 *
2709 * As soon as the page was really sent over the network put_page()
2710 * gets called by some part of the network layer. [ NIC driver? ]
2711 *
2712 * [ get_page() / put_page() increment/decrement the count. If count
2713 * reaches 0 the page will be freed. ]
2714 *
2715 * This works nicely with pages from FSs.
2716 * But this means that in protocol A we might signal IO completion too early!
2717 *
2718 * In order not to corrupt data during a resync we must make sure
2719 * that we do not reuse our own buffer pages (EEs) to early, therefore
2720 * we have the net_ee list.
2721 *
2722 * XFS seems to have problems, still, it submits pages with page_count == 0!
2723 * As a workaround, we disable sendpage on pages
2724 * with page_count == 0 or PageSlab.
2725 */
2726static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002727 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728{
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002729 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002730 kunmap(page);
2731 if (sent == size)
2732 mdev->send_cnt += size>>9;
2733 return sent == size;
2734}
2735
2736static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002737 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738{
2739 mm_segment_t oldfs = get_fs();
2740 int sent, ok;
2741 int len = size;
2742
2743 /* e.g. XFS meta- & log-data is in slab pages, which have a
2744 * page_count of 0 and/or have PageSlab() set.
2745 * we cannot use send_page for those, as that does get_page();
2746 * put_page(); and would cause either a VM_BUG directly, or
2747 * __page_cache_release a page that would actually still be referenced
2748 * by someone, leading to some obscure delayed Oops somewhere else. */
2749 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002750 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002752 msg_flags |= MSG_NOSIGNAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002753 drbd_update_congested(mdev);
2754 set_fs(KERNEL_DS);
2755 do {
2756 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
2757 offset, len,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002758 msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 if (sent == -EAGAIN) {
2760 if (we_should_drop_the_connection(mdev,
2761 mdev->data.socket))
2762 break;
2763 else
2764 continue;
2765 }
2766 if (sent <= 0) {
2767 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
2768 __func__, (int)size, len, sent);
2769 break;
2770 }
2771 len -= sent;
2772 offset += sent;
2773 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
2774 set_fs(oldfs);
2775 clear_bit(NET_CONGESTED, &mdev->flags);
2776
2777 ok = (len == 0);
2778 if (likely(ok))
2779 mdev->send_cnt += size>>9;
2780 return ok;
2781}
2782
2783static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2784{
2785 struct bio_vec *bvec;
2786 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002787 /* hint all but last page with MSG_MORE */
Lars Ellenberg001a8862012-03-08 16:43:45 +01002788 bio_for_each_segment(bvec, bio, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789 if (!_drbd_no_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002790 bvec->bv_offset, bvec->bv_len,
2791 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002792 return 0;
2793 }
2794 return 1;
2795}
2796
2797static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2798{
2799 struct bio_vec *bvec;
2800 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002801 /* hint all but last page with MSG_MORE */
Lars Ellenberg001a8862012-03-08 16:43:45 +01002802 bio_for_each_segment(bvec, bio, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803 if (!_drbd_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002804 bvec->bv_offset, bvec->bv_len,
2805 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002806 return 0;
2807 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002808 return 1;
2809}
2810
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002811static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2812{
2813 struct page *page = e->pages;
2814 unsigned len = e->size;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002815 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002816 page_chain_for_each(page) {
2817 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002818 if (!_drbd_send_page(mdev, page, 0, l,
2819 page_chain_next(page) ? MSG_MORE : 0))
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002820 return 0;
2821 len -= l;
2822 }
2823 return 1;
2824}
2825
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002826static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
2827{
2828 if (mdev->agreed_pro_version >= 95)
2829 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002830 (bi_rw & REQ_FUA ? DP_FUA : 0) |
2831 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
2832 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
2833 else
Jens Axboe721a9602011-03-09 11:56:30 +01002834 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002835}
2836
Philipp Reisnerb411b362009-09-25 16:07:19 -07002837/* Used to send write requests
2838 * R_PRIMARY -> Peer (P_DATA)
2839 */
2840int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2841{
2842 int ok = 1;
2843 struct p_data p;
2844 unsigned int dp_flags = 0;
2845 void *dgb;
2846 int dgs;
2847
2848 if (!drbd_get_data_sock(mdev))
2849 return 0;
2850
2851 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2852 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2853
Philipp Reisnerd5373382010-08-23 15:18:33 +02002854 if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
Philipp Reisner0b70a132010-08-20 13:36:10 +02002855 p.head.h80.magic = BE_DRBD_MAGIC;
2856 p.head.h80.command = cpu_to_be16(P_DATA);
2857 p.head.h80.length =
2858 cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2859 } else {
2860 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2861 p.head.h95.command = cpu_to_be16(P_DATA);
2862 p.head.h95.length =
2863 cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2864 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865
2866 p.sector = cpu_to_be64(req->sector);
2867 p.block_id = (unsigned long)req;
Lars Ellenberg671a74e2012-03-08 11:45:57 +01002868 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002870 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
2871
Philipp Reisnerb411b362009-09-25 16:07:19 -07002872 if (mdev->state.conn >= C_SYNC_SOURCE &&
2873 mdev->state.conn <= C_PAUSED_SYNC_T)
2874 dp_flags |= DP_MAY_SET_IN_SYNC;
2875
2876 p.dp_flags = cpu_to_be32(dp_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 set_bit(UNPLUG_REMOTE, &mdev->flags);
2878 ok = (sizeof(p) ==
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002879 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002880 if (ok && dgs) {
2881 dgb = mdev->int_dig_out;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002882 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
Andreas Gruenbachercab2f742010-12-09 16:08:46 +01002883 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 }
2885 if (ok) {
Lars Ellenberg470be442010-11-10 10:36:52 +01002886 /* For protocol A, we have to memcpy the payload into
2887 * socket buffers, as we may complete right away
2888 * as soon as we handed it over to tcp, at which point the data
2889 * pages may become invalid.
2890 *
2891 * For data-integrity enabled, we copy it as well, so we can be
2892 * sure that even if the bio pages may still be modified, it
2893 * won't change the data on the wire, thus if the digest checks
2894 * out ok after sending on this side, but does not fit on the
2895 * receiving side, we sure have detected corruption elsewhere.
2896 */
2897 if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898 ok = _drbd_send_bio(mdev, req->master_bio);
2899 else
2900 ok = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01002901
2902 /* double check digest, sometimes buffers have been modified in flight. */
2903 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02002904 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01002905 * currently supported in kernel crypto. */
2906 unsigned char digest[64];
2907 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
2908 if (memcmp(mdev->int_dig_out, digest, dgs)) {
2909 dev_warn(DEV,
2910 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
2911 (unsigned long long)req->sector, req->size);
2912 }
2913 } /* else if (dgs > 64) {
2914 ... Be noisy about digest too large ...
2915 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002916 }
2917
2918 drbd_put_data_sock(mdev);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02002919
Philipp Reisnerb411b362009-09-25 16:07:19 -07002920 return ok;
2921}
2922
2923/* answer packet, used to send data back for read requests:
2924 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
2925 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
2926 */
2927int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2928 struct drbd_epoch_entry *e)
2929{
2930 int ok;
2931 struct p_data p;
2932 void *dgb;
2933 int dgs;
2934
2935 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2936 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2937
Philipp Reisnerd5373382010-08-23 15:18:33 +02002938 if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
Philipp Reisner0b70a132010-08-20 13:36:10 +02002939 p.head.h80.magic = BE_DRBD_MAGIC;
2940 p.head.h80.command = cpu_to_be16(cmd);
2941 p.head.h80.length =
2942 cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2943 } else {
2944 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2945 p.head.h95.command = cpu_to_be16(cmd);
2946 p.head.h95.length =
2947 cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2948 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002949
2950 p.sector = cpu_to_be64(e->sector);
2951 p.block_id = e->block_id;
2952 /* p.seq_num = 0; No sequence numbers here.. */
2953
2954 /* Only called by our kernel thread.
2955 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
2956 * in response to admin command or module unload.
2957 */
2958 if (!drbd_get_data_sock(mdev))
2959 return 0;
2960
Philipp Reisner0b70a132010-08-20 13:36:10 +02002961 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002962 if (ok && dgs) {
2963 dgb = mdev->int_dig_out;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002964 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
Andreas Gruenbachercab2f742010-12-09 16:08:46 +01002965 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966 }
2967 if (ok)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002968 ok = _drbd_send_zc_ee(mdev, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002969
2970 drbd_put_data_sock(mdev);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02002971
Philipp Reisnerb411b362009-09-25 16:07:19 -07002972 return ok;
2973}
2974
Philipp Reisner73a01a12010-10-27 14:33:00 +02002975int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
2976{
2977 struct p_block_desc p;
2978
2979 p.sector = cpu_to_be64(req->sector);
2980 p.blksize = cpu_to_be32(req->size);
2981
2982 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
2983}
2984
Philipp Reisnerb411b362009-09-25 16:07:19 -07002985/*
2986 drbd_send distinguishes two cases:
2987
2988 Packets sent via the data socket "sock"
2989 and packets sent via the meta data socket "msock"
2990
2991 sock msock
2992 -----------------+-------------------------+------------------------------
2993 timeout conf.timeout / 2 conf.timeout / 2
2994 timeout action send a ping via msock Abort communication
2995 and close all sockets
2996*/
2997
2998/*
2999 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
3000 */
3001int drbd_send(struct drbd_conf *mdev, struct socket *sock,
3002 void *buf, size_t size, unsigned msg_flags)
3003{
3004 struct kvec iov;
3005 struct msghdr msg;
3006 int rv, sent = 0;
3007
3008 if (!sock)
3009 return -1000;
3010
3011 /* THINK if (signal_pending) return ... ? */
3012
3013 iov.iov_base = buf;
3014 iov.iov_len = size;
3015
3016 msg.msg_name = NULL;
3017 msg.msg_namelen = 0;
3018 msg.msg_control = NULL;
3019 msg.msg_controllen = 0;
3020 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
3021
3022 if (sock == mdev->data.socket) {
3023 mdev->ko_count = mdev->net_conf->ko_count;
3024 drbd_update_congested(mdev);
3025 }
3026 do {
3027 /* STRANGE
3028 * tcp_sendmsg does _not_ use its size parameter at all ?
3029 *
3030 * -EAGAIN on timeout, -EINTR on signal.
3031 */
3032/* THINK
3033 * do we need to block DRBD_SIG if sock == &meta.socket ??
3034 * otherwise wake_asender() might interrupt some send_*Ack !
3035 */
3036 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
3037 if (rv == -EAGAIN) {
3038 if (we_should_drop_the_connection(mdev, sock))
3039 break;
3040 else
3041 continue;
3042 }
3043 D_ASSERT(rv != 0);
3044 if (rv == -EINTR) {
3045 flush_signals(current);
3046 rv = 0;
3047 }
3048 if (rv < 0)
3049 break;
3050 sent += rv;
3051 iov.iov_base += rv;
3052 iov.iov_len -= rv;
3053 } while (sent < size);
3054
3055 if (sock == mdev->data.socket)
3056 clear_bit(NET_CONGESTED, &mdev->flags);
3057
3058 if (rv <= 0) {
3059 if (rv != -EAGAIN) {
3060 dev_err(DEV, "%s_sendmsg returned %d\n",
3061 sock == mdev->meta.socket ? "msock" : "sock",
3062 rv);
3063 drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
3064 } else
3065 drbd_force_state(mdev, NS(conn, C_TIMEOUT));
3066 }
3067
3068 return sent;
3069}
3070
3071static int drbd_open(struct block_device *bdev, fmode_t mode)
3072{
3073 struct drbd_conf *mdev = bdev->bd_disk->private_data;
3074 unsigned long flags;
3075 int rv = 0;
3076
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003077 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003078 spin_lock_irqsave(&mdev->req_lock, flags);
3079 /* to have a stable mdev->state.role
3080 * and no race with updating open_cnt */
3081
3082 if (mdev->state.role != R_PRIMARY) {
3083 if (mode & FMODE_WRITE)
3084 rv = -EROFS;
3085 else if (!allow_oos)
3086 rv = -EMEDIUMTYPE;
3087 }
3088
3089 if (!rv)
3090 mdev->open_cnt++;
3091 spin_unlock_irqrestore(&mdev->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003092 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093
3094 return rv;
3095}
3096
3097static int drbd_release(struct gendisk *gd, fmode_t mode)
3098{
3099 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003100 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003102 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003103 return 0;
3104}
3105
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106static void drbd_set_defaults(struct drbd_conf *mdev)
3107{
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003108 /* This way we get a compile error when sync_conf grows,
3109 and we forgot to initialize it here */
3110 mdev->sync_conf = (struct syncer_conf) {
3111 /* .rate = */ DRBD_RATE_DEF,
3112 /* .after = */ DRBD_AFTER_DEF,
3113 /* .al_extents = */ DRBD_AL_EXTENTS_DEF,
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003114 /* .verify_alg = */ {}, 0,
3115 /* .cpu_mask = */ {}, 0,
3116 /* .csums_alg = */ {}, 0,
Philipp Reisnere7564142010-06-29 17:35:34 +02003117 /* .use_rle = */ 0,
Philipp Reisner9a31d712010-07-05 13:42:03 +02003118 /* .on_no_data = */ DRBD_ON_NO_DATA_DEF,
3119 /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF,
3120 /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF,
3121 /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF,
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003122 /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF,
3123 /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003124 };
3125
3126 /* Have to use that way, because the layout differs between
3127 big endian and little endian */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128 mdev->state = (union drbd_state) {
3129 { .role = R_SECONDARY,
3130 .peer = R_UNKNOWN,
3131 .conn = C_STANDALONE,
3132 .disk = D_DISKLESS,
3133 .pdsk = D_UNKNOWN,
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003134 .susp = 0,
3135 .susp_nod = 0,
3136 .susp_fen = 0
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137 } };
3138}
3139
3140void drbd_init_set_defaults(struct drbd_conf *mdev)
3141{
3142 /* the memset(,0,) did most of this.
3143 * note: only assignments, no allocation in here */
3144
3145 drbd_set_defaults(mdev);
3146
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147 atomic_set(&mdev->ap_bio_cnt, 0);
3148 atomic_set(&mdev->ap_pending_cnt, 0);
3149 atomic_set(&mdev->rs_pending_cnt, 0);
3150 atomic_set(&mdev->unacked_cnt, 0);
3151 atomic_set(&mdev->local_cnt, 0);
3152 atomic_set(&mdev->net_cnt, 0);
3153 atomic_set(&mdev->packet_seq, 0);
3154 atomic_set(&mdev->pp_in_use, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003155 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02003156 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003157 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02003158 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnere1711732011-06-27 11:51:46 +02003159 atomic_set(&mdev->md_io_in_use, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003160
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 mutex_init(&mdev->data.mutex);
3162 mutex_init(&mdev->meta.mutex);
3163 sema_init(&mdev->data.work.s, 0);
3164 sema_init(&mdev->meta.work.s, 0);
3165 mutex_init(&mdev->state_mutex);
3166
3167 spin_lock_init(&mdev->data.work.q_lock);
3168 spin_lock_init(&mdev->meta.work.q_lock);
3169
3170 spin_lock_init(&mdev->al_lock);
3171 spin_lock_init(&mdev->req_lock);
3172 spin_lock_init(&mdev->peer_seq_lock);
3173 spin_lock_init(&mdev->epoch_lock);
3174
3175 INIT_LIST_HEAD(&mdev->active_ee);
3176 INIT_LIST_HEAD(&mdev->sync_ee);
3177 INIT_LIST_HEAD(&mdev->done_ee);
3178 INIT_LIST_HEAD(&mdev->read_ee);
3179 INIT_LIST_HEAD(&mdev->net_ee);
3180 INIT_LIST_HEAD(&mdev->resync_reads);
3181 INIT_LIST_HEAD(&mdev->data.work.q);
3182 INIT_LIST_HEAD(&mdev->meta.work.q);
3183 INIT_LIST_HEAD(&mdev->resync_work.list);
3184 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003185 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003186 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003187 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003188 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02003189
Philipp Reisner794abb72010-12-27 11:51:23 +01003190 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003192 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193 mdev->md_sync_work.cb = w_md_sync;
3194 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01003195 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003196 init_timer(&mdev->resync_timer);
3197 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01003198 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003199 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003200 mdev->resync_timer.function = resync_timer_fn;
3201 mdev->resync_timer.data = (unsigned long) mdev;
3202 mdev->md_sync_timer.function = md_sync_timer_fn;
3203 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01003204 mdev->start_resync_timer.function = start_resync_timer_fn;
3205 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003206 mdev->request_timer.function = request_timer_fn;
3207 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003208
3209 init_waitqueue_head(&mdev->misc_wait);
3210 init_waitqueue_head(&mdev->state_wait);
Philipp Reisner84dfb9f2010-06-23 11:20:05 +02003211 init_waitqueue_head(&mdev->net_cnt_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 init_waitqueue_head(&mdev->ee_wait);
3213 init_waitqueue_head(&mdev->al_wait);
3214 init_waitqueue_head(&mdev->seq_wait);
3215
3216 drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
3217 drbd_thread_init(mdev, &mdev->worker, drbd_worker);
3218 drbd_thread_init(mdev, &mdev->asender, drbd_asender);
3219
3220 mdev->agreed_pro_version = PRO_VERSION_MAX;
Philipp Reisner2451fc32010-08-24 13:43:11 +02003221 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02003223 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3224 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003225}
3226
3227void drbd_mdev_cleanup(struct drbd_conf *mdev)
3228{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003229 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 if (mdev->receiver.t_state != None)
3231 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
3232 mdev->receiver.t_state);
3233
3234 /* no need to lock it, I'm the only thread alive */
3235 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
3236 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
3237 mdev->al_writ_cnt =
3238 mdev->bm_writ_cnt =
3239 mdev->read_cnt =
3240 mdev->recv_cnt =
3241 mdev->send_cnt =
3242 mdev->writ_cnt =
3243 mdev->p_size =
3244 mdev->rs_start =
3245 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003246 mdev->rs_failed = 0;
3247 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003248 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003249 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
3250 mdev->rs_mark_left[i] = 0;
3251 mdev->rs_mark_time[i] = 0;
3252 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253 D_ASSERT(mdev->net_conf == NULL);
3254
3255 drbd_set_my_capacity(mdev, 0);
3256 if (mdev->bitmap) {
3257 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01003258 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003259 drbd_bm_cleanup(mdev);
3260 }
3261
3262 drbd_free_resources(mdev);
Philipp Reisner07782862010-08-31 12:00:50 +02003263 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003264
3265 /*
3266 * currently we drbd_init_ee only on module load, so
3267 * we may do drbd_release_ee only on module unload!
3268 */
3269 D_ASSERT(list_empty(&mdev->active_ee));
3270 D_ASSERT(list_empty(&mdev->sync_ee));
3271 D_ASSERT(list_empty(&mdev->done_ee));
3272 D_ASSERT(list_empty(&mdev->read_ee));
3273 D_ASSERT(list_empty(&mdev->net_ee));
3274 D_ASSERT(list_empty(&mdev->resync_reads));
3275 D_ASSERT(list_empty(&mdev->data.work.q));
3276 D_ASSERT(list_empty(&mdev->meta.work.q));
3277 D_ASSERT(list_empty(&mdev->resync_work.list));
3278 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003279 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01003280
3281 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003282}
3283
3284
3285static void drbd_destroy_mempools(void)
3286{
3287 struct page *page;
3288
3289 while (drbd_pp_pool) {
3290 page = drbd_pp_pool;
3291 drbd_pp_pool = (struct page *)page_private(page);
3292 __free_page(page);
3293 drbd_pp_vacant--;
3294 }
3295
3296 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
3297
Lars Ellenberg9476f392011-02-23 17:02:01 +01003298 if (drbd_md_io_bio_set)
3299 bioset_free(drbd_md_io_bio_set);
Lars Ellenberg42818082011-02-23 12:39:46 +01003300 if (drbd_md_io_page_pool)
3301 mempool_destroy(drbd_md_io_page_pool);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 if (drbd_ee_mempool)
3303 mempool_destroy(drbd_ee_mempool);
3304 if (drbd_request_mempool)
3305 mempool_destroy(drbd_request_mempool);
3306 if (drbd_ee_cache)
3307 kmem_cache_destroy(drbd_ee_cache);
3308 if (drbd_request_cache)
3309 kmem_cache_destroy(drbd_request_cache);
3310 if (drbd_bm_ext_cache)
3311 kmem_cache_destroy(drbd_bm_ext_cache);
3312 if (drbd_al_ext_cache)
3313 kmem_cache_destroy(drbd_al_ext_cache);
3314
Lars Ellenberg9476f392011-02-23 17:02:01 +01003315 drbd_md_io_bio_set = NULL;
Lars Ellenberg42818082011-02-23 12:39:46 +01003316 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003317 drbd_ee_mempool = NULL;
3318 drbd_request_mempool = NULL;
3319 drbd_ee_cache = NULL;
3320 drbd_request_cache = NULL;
3321 drbd_bm_ext_cache = NULL;
3322 drbd_al_ext_cache = NULL;
3323
3324 return;
3325}
3326
3327static int drbd_create_mempools(void)
3328{
3329 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01003330 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 int i;
3332
3333 /* prepare our caches and mempools */
3334 drbd_request_mempool = NULL;
3335 drbd_ee_cache = NULL;
3336 drbd_request_cache = NULL;
3337 drbd_bm_ext_cache = NULL;
3338 drbd_al_ext_cache = NULL;
3339 drbd_pp_pool = NULL;
Lars Ellenberg42818082011-02-23 12:39:46 +01003340 drbd_md_io_page_pool = NULL;
Lars Ellenberg9476f392011-02-23 17:02:01 +01003341 drbd_md_io_bio_set = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342
3343 /* caches */
3344 drbd_request_cache = kmem_cache_create(
3345 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
3346 if (drbd_request_cache == NULL)
3347 goto Enomem;
3348
3349 drbd_ee_cache = kmem_cache_create(
3350 "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
3351 if (drbd_ee_cache == NULL)
3352 goto Enomem;
3353
3354 drbd_bm_ext_cache = kmem_cache_create(
3355 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
3356 if (drbd_bm_ext_cache == NULL)
3357 goto Enomem;
3358
3359 drbd_al_ext_cache = kmem_cache_create(
3360 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
3361 if (drbd_al_ext_cache == NULL)
3362 goto Enomem;
3363
3364 /* mempools */
Lars Ellenberg9476f392011-02-23 17:02:01 +01003365#ifdef COMPAT_HAVE_BIOSET_CREATE
3366 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
3367 if (drbd_md_io_bio_set == NULL)
3368 goto Enomem;
3369#endif
3370
Lars Ellenberg42818082011-02-23 12:39:46 +01003371 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
3372 if (drbd_md_io_page_pool == NULL)
3373 goto Enomem;
3374
Philipp Reisnerb411b362009-09-25 16:07:19 -07003375 drbd_request_mempool = mempool_create(number,
3376 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
3377 if (drbd_request_mempool == NULL)
3378 goto Enomem;
3379
3380 drbd_ee_mempool = mempool_create(number,
3381 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06003382 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003383 goto Enomem;
3384
3385 /* drbd's page pool */
3386 spin_lock_init(&drbd_pp_lock);
3387
3388 for (i = 0; i < number; i++) {
3389 page = alloc_page(GFP_HIGHUSER);
3390 if (!page)
3391 goto Enomem;
3392 set_page_private(page, (unsigned long)drbd_pp_pool);
3393 drbd_pp_pool = page;
3394 }
3395 drbd_pp_vacant = number;
3396
3397 return 0;
3398
3399Enomem:
3400 drbd_destroy_mempools(); /* in case we allocated some */
3401 return -ENOMEM;
3402}
3403
3404static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
3405 void *unused)
3406{
3407 /* just so we have it. you never know what interesting things we
3408 * might want to do here some day...
3409 */
3410
3411 return NOTIFY_DONE;
3412}
3413
3414static struct notifier_block drbd_notifier = {
3415 .notifier_call = drbd_notify_sys,
3416};
3417
3418static void drbd_release_ee_lists(struct drbd_conf *mdev)
3419{
3420 int rr;
3421
3422 rr = drbd_release_ee(mdev, &mdev->active_ee);
3423 if (rr)
3424 dev_err(DEV, "%d EEs in active list found!\n", rr);
3425
3426 rr = drbd_release_ee(mdev, &mdev->sync_ee);
3427 if (rr)
3428 dev_err(DEV, "%d EEs in sync list found!\n", rr);
3429
3430 rr = drbd_release_ee(mdev, &mdev->read_ee);
3431 if (rr)
3432 dev_err(DEV, "%d EEs in read list found!\n", rr);
3433
3434 rr = drbd_release_ee(mdev, &mdev->done_ee);
3435 if (rr)
3436 dev_err(DEV, "%d EEs in done list found!\n", rr);
3437
3438 rr = drbd_release_ee(mdev, &mdev->net_ee);
3439 if (rr)
3440 dev_err(DEV, "%d EEs in net list found!\n", rr);
3441}
3442
3443/* caution. no locking.
3444 * currently only used from module cleanup code. */
3445static void drbd_delete_device(unsigned int minor)
3446{
3447 struct drbd_conf *mdev = minor_to_mdev(minor);
3448
3449 if (!mdev)
3450 return;
3451
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02003452 del_timer_sync(&mdev->request_timer);
3453
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 /* paranoia asserts */
3455 if (mdev->open_cnt != 0)
3456 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
3457 __FILE__ , __LINE__);
3458
3459 ERR_IF (!list_empty(&mdev->data.work.q)) {
3460 struct list_head *lp;
3461 list_for_each(lp, &mdev->data.work.q) {
3462 dev_err(DEV, "lp = %p\n", lp);
3463 }
3464 };
3465 /* end paranoia asserts */
3466
3467 del_gendisk(mdev->vdisk);
3468
3469 /* cleanup stuff that may have been allocated during
3470 * device (re-)configuration or state changes */
3471
3472 if (mdev->this_bdev)
3473 bdput(mdev->this_bdev);
3474
3475 drbd_free_resources(mdev);
3476
3477 drbd_release_ee_lists(mdev);
3478
Bart Van Assche24c48302011-05-21 18:32:29 +02003479 /* should be freed on disconnect? */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480 kfree(mdev->ee_hash);
3481 /*
3482 mdev->ee_hash_s = 0;
3483 mdev->ee_hash = NULL;
3484 */
3485
3486 lc_destroy(mdev->act_log);
3487 lc_destroy(mdev->resync);
3488
3489 kfree(mdev->p_uuid);
3490 /* mdev->p_uuid = NULL; */
3491
3492 kfree(mdev->int_dig_out);
3493 kfree(mdev->int_dig_in);
3494 kfree(mdev->int_dig_vv);
3495
3496 /* cleanup the rest that has been
3497 * allocated from drbd_new_device
3498 * and actually free the mdev itself */
3499 drbd_free_mdev(mdev);
3500}
3501
3502static void drbd_cleanup(void)
3503{
3504 unsigned int i;
3505
3506 unregister_reboot_notifier(&drbd_notifier);
3507
Lars Ellenberg17a93f32010-11-24 10:37:35 +01003508 /* first remove proc,
3509 * drbdsetup uses it's presence to detect
3510 * whether DRBD is loaded.
3511 * If we would get stuck in proc removal,
3512 * but have netlink already deregistered,
3513 * some drbdsetup commands may wait forever
3514 * for an answer.
3515 */
3516 if (drbd_proc)
3517 remove_proc_entry("drbd", NULL);
3518
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519 drbd_nl_cleanup();
3520
3521 if (minor_table) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003522 i = minor_count;
3523 while (i--)
3524 drbd_delete_device(i);
3525 drbd_destroy_mempools();
3526 }
3527
3528 kfree(minor_table);
3529
3530 unregister_blkdev(DRBD_MAJOR, "drbd");
3531
3532 printk(KERN_INFO "drbd: module cleanup done.\n");
3533}
3534
3535/**
3536 * drbd_congested() - Callback for pdflush
3537 * @congested_data: User data
3538 * @bdi_bits: Bits pdflush is currently interested in
3539 *
3540 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
3541 */
3542static int drbd_congested(void *congested_data, int bdi_bits)
3543{
3544 struct drbd_conf *mdev = congested_data;
3545 struct request_queue *q;
3546 char reason = '-';
3547 int r = 0;
3548
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01003549 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003550 /* DRBD has frozen IO */
3551 r = bdi_bits;
3552 reason = 'd';
3553 goto out;
3554 }
3555
Lars Ellenbergc2ba6862012-06-14 15:14:06 +02003556 if (test_bit(CALLBACK_PENDING, &mdev->flags)) {
3557 r |= (1 << BDI_async_congested);
3558 /* Without good local data, we would need to read from remote,
3559 * and that would need the worker thread as well, which is
3560 * currently blocked waiting for that usermode helper to
3561 * finish.
3562 */
3563 if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
3564 r |= (1 << BDI_sync_congested);
3565 else
3566 put_ldev(mdev);
3567 r &= bdi_bits;
3568 reason = 'c';
3569 goto out;
3570 }
3571
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572 if (get_ldev(mdev)) {
3573 q = bdev_get_queue(mdev->ldev->backing_bdev);
3574 r = bdi_congested(&q->backing_dev_info, bdi_bits);
3575 put_ldev(mdev);
3576 if (r)
3577 reason = 'b';
3578 }
3579
3580 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
3581 r |= (1 << BDI_async_congested);
3582 reason = reason == 'b' ? 'a' : 'n';
3583 }
3584
3585out:
3586 mdev->congestion_reason = reason;
3587 return r;
3588}
3589
3590struct drbd_conf *drbd_new_device(unsigned int minor)
3591{
3592 struct drbd_conf *mdev;
3593 struct gendisk *disk;
3594 struct request_queue *q;
3595
3596 /* GFP_KERNEL, we are outside of all write-out paths */
3597 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
3598 if (!mdev)
3599 return NULL;
3600 if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
3601 goto out_no_cpumask;
3602
3603 mdev->minor = minor;
3604
3605 drbd_init_set_defaults(mdev);
3606
3607 q = blk_alloc_queue(GFP_KERNEL);
3608 if (!q)
3609 goto out_no_q;
3610 mdev->rq_queue = q;
3611 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003612
3613 disk = alloc_disk(1);
3614 if (!disk)
3615 goto out_no_disk;
3616 mdev->vdisk = disk;
3617
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003618 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619
3620 disk->queue = q;
3621 disk->major = DRBD_MAJOR;
3622 disk->first_minor = minor;
3623 disk->fops = &drbd_ops;
3624 sprintf(disk->disk_name, "drbd%d", minor);
3625 disk->private_data = mdev;
3626
3627 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
3628 /* we have no partitions. we contain only ourselves. */
3629 mdev->this_bdev->bd_contains = mdev->this_bdev;
3630
3631 q->backing_dev_info.congested_fn = drbd_congested;
3632 q->backing_dev_info.congested_data = mdev;
3633
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01003634 blk_queue_make_request(q, drbd_make_request);
Lars Ellenberga73ff322012-06-25 19:15:38 +02003635 blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003636 /* Setting the max_hw_sectors to an odd value of 8kibyte here
3637 This triggers a max_bio_size message upon first attach or connect */
3638 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003639 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3640 blk_queue_merge_bvec(q, drbd_merge_bvec);
Jens Axboe7eaceac2011-03-10 08:52:07 +01003641 q->queue_lock = &mdev->req_lock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642
3643 mdev->md_io_page = alloc_page(GFP_KERNEL);
3644 if (!mdev->md_io_page)
3645 goto out_no_io_page;
3646
3647 if (drbd_bm_init(mdev))
3648 goto out_no_bitmap;
3649 /* no need to lock access, we are still initializing this minor device. */
3650 if (!tl_init(mdev))
3651 goto out_no_tl;
3652
3653 mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
3654 if (!mdev->app_reads_hash)
3655 goto out_no_app_reads;
3656
3657 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
3658 if (!mdev->current_epoch)
3659 goto out_no_epoch;
3660
3661 INIT_LIST_HEAD(&mdev->current_epoch->list);
3662 mdev->epochs = 1;
3663
3664 return mdev;
3665
3666/* out_whatever_else:
3667 kfree(mdev->current_epoch); */
3668out_no_epoch:
3669 kfree(mdev->app_reads_hash);
3670out_no_app_reads:
3671 tl_cleanup(mdev);
3672out_no_tl:
3673 drbd_bm_cleanup(mdev);
3674out_no_bitmap:
3675 __free_page(mdev->md_io_page);
3676out_no_io_page:
3677 put_disk(disk);
3678out_no_disk:
3679 blk_cleanup_queue(q);
3680out_no_q:
3681 free_cpumask_var(mdev->cpu_mask);
3682out_no_cpumask:
3683 kfree(mdev);
3684 return NULL;
3685}
3686
3687/* counterpart of drbd_new_device.
3688 * last part of drbd_delete_device. */
3689void drbd_free_mdev(struct drbd_conf *mdev)
3690{
3691 kfree(mdev->current_epoch);
3692 kfree(mdev->app_reads_hash);
3693 tl_cleanup(mdev);
3694 if (mdev->bitmap) /* should no longer be there. */
3695 drbd_bm_cleanup(mdev);
3696 __free_page(mdev->md_io_page);
3697 put_disk(mdev->vdisk);
3698 blk_cleanup_queue(mdev->rq_queue);
3699 free_cpumask_var(mdev->cpu_mask);
Philipp Reisner37190942010-11-10 12:08:37 +01003700 drbd_free_tl_hash(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003701 kfree(mdev);
3702}
3703
3704
3705int __init drbd_init(void)
3706{
3707 int err;
3708
3709 if (sizeof(struct p_handshake) != 80) {
3710 printk(KERN_ERR
3711 "drbd: never change the size or layout "
3712 "of the HandShake packet.\n");
3713 return -EINVAL;
3714 }
3715
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01003716 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003717 printk(KERN_ERR
3718 "drbd: invalid minor_count (%d)\n", minor_count);
3719#ifdef MODULE
3720 return -EINVAL;
3721#else
3722 minor_count = 8;
3723#endif
3724 }
3725
3726 err = drbd_nl_init();
3727 if (err)
3728 return err;
3729
3730 err = register_blkdev(DRBD_MAJOR, "drbd");
3731 if (err) {
3732 printk(KERN_ERR
3733 "drbd: unable to register block device major %d\n",
3734 DRBD_MAJOR);
3735 return err;
3736 }
3737
3738 register_reboot_notifier(&drbd_notifier);
3739
3740 /*
3741 * allocate all necessary structs
3742 */
3743 err = -ENOMEM;
3744
3745 init_waitqueue_head(&drbd_pp_wait);
3746
3747 drbd_proc = NULL; /* play safe for drbd_cleanup */
3748 minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
3749 GFP_KERNEL);
3750 if (!minor_table)
3751 goto Enomem;
3752
3753 err = drbd_create_mempools();
3754 if (err)
3755 goto Enomem;
3756
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01003757 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758 if (!drbd_proc) {
3759 printk(KERN_ERR "drbd: unable to register proc file\n");
3760 goto Enomem;
3761 }
3762
3763 rwlock_init(&global_state_lock);
3764
3765 printk(KERN_INFO "drbd: initialized. "
3766 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
3767 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
3768 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
3769 printk(KERN_INFO "drbd: registered as block device major %d\n",
3770 DRBD_MAJOR);
3771 printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
3772
3773 return 0; /* Success! */
3774
3775Enomem:
3776 drbd_cleanup();
3777 if (err == -ENOMEM)
3778 /* currently always the case */
3779 printk(KERN_ERR "drbd: ran out of memory\n");
3780 else
3781 printk(KERN_ERR "drbd: initialization failure\n");
3782 return err;
3783}
3784
3785void drbd_free_bc(struct drbd_backing_dev *ldev)
3786{
3787 if (ldev == NULL)
3788 return;
3789
Tejun Heoe525fd82010-11-13 11:55:17 +01003790 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3791 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003792
3793 kfree(ldev);
3794}
3795
3796void drbd_free_sock(struct drbd_conf *mdev)
3797{
3798 if (mdev->data.socket) {
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003799 mutex_lock(&mdev->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003800 kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
3801 sock_release(mdev->data.socket);
3802 mdev->data.socket = NULL;
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003803 mutex_unlock(&mdev->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 }
3805 if (mdev->meta.socket) {
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003806 mutex_lock(&mdev->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003807 kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
3808 sock_release(mdev->meta.socket);
3809 mdev->meta.socket = NULL;
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003810 mutex_unlock(&mdev->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811 }
3812}
3813
3814
3815void drbd_free_resources(struct drbd_conf *mdev)
3816{
3817 crypto_free_hash(mdev->csums_tfm);
3818 mdev->csums_tfm = NULL;
3819 crypto_free_hash(mdev->verify_tfm);
3820 mdev->verify_tfm = NULL;
3821 crypto_free_hash(mdev->cram_hmac_tfm);
3822 mdev->cram_hmac_tfm = NULL;
3823 crypto_free_hash(mdev->integrity_w_tfm);
3824 mdev->integrity_w_tfm = NULL;
3825 crypto_free_hash(mdev->integrity_r_tfm);
3826 mdev->integrity_r_tfm = NULL;
3827
3828 drbd_free_sock(mdev);
3829
3830 __no_warn(local,
3831 drbd_free_bc(mdev->ldev);
3832 mdev->ldev = NULL;);
3833}
3834
3835/* meta data management */
3836
3837struct meta_data_on_disk {
3838 u64 la_size; /* last agreed size. */
3839 u64 uuid[UI_SIZE]; /* UUIDs. */
3840 u64 device_uuid;
3841 u64 reserved_u64_1;
3842 u32 flags; /* MDF */
3843 u32 magic;
3844 u32 md_size_sect;
3845 u32 al_offset; /* offset to this block */
3846 u32 al_nr_extents; /* important for restoring the AL */
3847 /* `-- act_log->nr_elements <-- sync_conf.al_extents */
3848 u32 bm_offset; /* offset to the bitmap, from here */
3849 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02003850 u32 la_peer_max_bio_size; /* last peer max_bio_size */
3851 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852
3853} __packed;
3854
3855/**
3856 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
3857 * @mdev: DRBD device.
3858 */
3859void drbd_md_sync(struct drbd_conf *mdev)
3860{
3861 struct meta_data_on_disk *buffer;
3862 sector_t sector;
3863 int i;
3864
Lars Ellenbergee15b032010-09-03 10:00:09 +02003865 del_timer(&mdev->md_sync_timer);
3866 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003867 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
3868 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003869
3870 /* We use here D_FAILED and not D_ATTACHING because we try to write
3871 * metadata even if we detach due to a disk failure! */
3872 if (!get_ldev_if_state(mdev, D_FAILED))
3873 return;
3874
Philipp Reisnere1711732011-06-27 11:51:46 +02003875 buffer = drbd_md_get_buffer(mdev);
3876 if (!buffer)
3877 goto out;
3878
Philipp Reisnerb411b362009-09-25 16:07:19 -07003879 memset(buffer, 0, 512);
3880
3881 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
3882 for (i = UI_CURRENT; i < UI_SIZE; i++)
3883 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
3884 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
3885 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
3886
3887 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
3888 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
3889 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
3890 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
3891 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
3892
3893 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003894 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003895
3896 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
3897 sector = mdev->ldev->md.md_offset;
3898
Lars Ellenberg3f3a9b82010-09-01 15:12:12 +02003899 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900 /* this was a try anyways ... */
3901 dev_err(DEV, "meta data update failed!\n");
Lars Ellenberg383606e2012-06-14 14:21:32 +02003902 drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903 }
3904
3905 /* Update mdev->ldev->md.la_size_sect,
3906 * since we updated it on metadata. */
3907 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
3908
Philipp Reisnere1711732011-06-27 11:51:46 +02003909 drbd_md_put_buffer(mdev);
3910out:
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911 put_ldev(mdev);
3912}
3913
3914/**
3915 * drbd_md_read() - Reads in the meta data super block
3916 * @mdev: DRBD device.
3917 * @bdev: Device from which the meta data should be read in.
3918 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01003919 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07003920 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
3921 */
3922int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3923{
3924 struct meta_data_on_disk *buffer;
3925 int i, rv = NO_ERROR;
3926
3927 if (!get_ldev_if_state(mdev, D_ATTACHING))
3928 return ERR_IO_MD_DISK;
3929
Philipp Reisnere1711732011-06-27 11:51:46 +02003930 buffer = drbd_md_get_buffer(mdev);
3931 if (!buffer)
3932 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933
3934 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003935 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936 called BEFORE disk is attached */
3937 dev_err(DEV, "Error while reading metadata.\n");
3938 rv = ERR_IO_MD_DISK;
3939 goto err;
3940 }
3941
3942 if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
3943 dev_err(DEV, "Error while reading metadata, magic not found.\n");
3944 rv = ERR_MD_INVALID;
3945 goto err;
3946 }
3947 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
3948 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
3949 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
3950 rv = ERR_MD_INVALID;
3951 goto err;
3952 }
3953 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
3954 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
3955 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
3956 rv = ERR_MD_INVALID;
3957 goto err;
3958 }
3959 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
3960 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
3961 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
3962 rv = ERR_MD_INVALID;
3963 goto err;
3964 }
3965
3966 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3967 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3968 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3969 rv = ERR_MD_INVALID;
3970 goto err;
3971 }
3972
3973 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
3974 for (i = UI_CURRENT; i < UI_SIZE; i++)
3975 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3976 bdev->md.flags = be32_to_cpu(buffer->flags);
3977 mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
3978 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3979
Philipp Reisner99432fc2011-05-20 16:39:13 +02003980 spin_lock_irq(&mdev->req_lock);
3981 if (mdev->state.conn < C_CONNECTED) {
Lars Ellenbergdb141b22012-06-25 19:15:58 +02003982 unsigned int peer;
Philipp Reisner99432fc2011-05-20 16:39:13 +02003983 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
Lars Ellenbergdb141b22012-06-25 19:15:58 +02003984 peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003985 mdev->peer_max_bio_size = peer;
3986 }
3987 spin_unlock_irq(&mdev->req_lock);
3988
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989 if (mdev->sync_conf.al_extents < 7)
3990 mdev->sync_conf.al_extents = 127;
3991
3992 err:
Philipp Reisnere1711732011-06-27 11:51:46 +02003993 drbd_md_put_buffer(mdev);
3994 out:
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995 put_ldev(mdev);
3996
3997 return rv;
3998}
3999
4000/**
4001 * drbd_md_mark_dirty() - Mark meta data super block as dirty
4002 * @mdev: DRBD device.
4003 *
4004 * Call this function if you change anything that should be written to
4005 * the meta-data super block. This function sets MD_DIRTY, and starts a
4006 * timer that ensures that within five seconds you have to call drbd_md_sync().
4007 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02004008#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02004009void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
4010{
4011 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
4012 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
4013 mdev->last_md_mark_dirty.line = line;
4014 mdev->last_md_mark_dirty.func = func;
4015 }
4016}
4017#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07004018void drbd_md_mark_dirty(struct drbd_conf *mdev)
4019{
Lars Ellenbergee15b032010-09-03 10:00:09 +02004020 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02004021 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004022}
Lars Ellenbergee15b032010-09-03 10:00:09 +02004023#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024
4025static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
4026{
4027 int i;
4028
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004029 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031}
4032
4033void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
4034{
4035 if (idx == UI_CURRENT) {
4036 if (mdev->state.role == R_PRIMARY)
4037 val |= 1;
4038 else
4039 val &= ~((u64)1);
4040
4041 drbd_set_ed_uuid(mdev, val);
4042 }
4043
4044 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045 drbd_md_mark_dirty(mdev);
4046}
4047
4048
4049void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
4050{
4051 if (mdev->ldev->md.uuid[idx]) {
4052 drbd_uuid_move_history(mdev);
4053 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054 }
4055 _drbd_uuid_set(mdev, idx, val);
4056}
4057
4058/**
4059 * drbd_uuid_new_current() - Creates a new current UUID
4060 * @mdev: DRBD device.
4061 *
4062 * Creates a new current UUID, and rotates the old current UUID into
4063 * the bitmap slot. Causes an incremental resync upon next connect.
4064 */
4065void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
4066{
4067 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004068 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004070 if (bm_uuid)
4071 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
4072
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074
4075 get_random_bytes(&val, sizeof(u64));
4076 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004077 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02004078 /* get it to stable storage _now_ */
4079 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080}
4081
4082void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
4083{
4084 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
4085 return;
4086
4087 if (val == 0) {
4088 drbd_uuid_move_history(mdev);
4089 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
4090 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004091 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004092 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
4093 if (bm_uuid)
4094 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004096 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004097 }
4098 drbd_md_mark_dirty(mdev);
4099}
4100
4101/**
4102 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
4103 * @mdev: DRBD device.
4104 *
4105 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
4106 */
4107int drbd_bmio_set_n_write(struct drbd_conf *mdev)
4108{
4109 int rv = -EIO;
4110
4111 if (get_ldev_if_state(mdev, D_ATTACHING)) {
4112 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
4113 drbd_md_sync(mdev);
4114 drbd_bm_set_all(mdev);
4115
4116 rv = drbd_bm_write(mdev);
4117
4118 if (!rv) {
4119 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
4120 drbd_md_sync(mdev);
4121 }
4122
4123 put_ldev(mdev);
4124 }
4125
4126 return rv;
4127}
4128
4129/**
4130 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
4131 * @mdev: DRBD device.
4132 *
4133 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
4134 */
4135int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
4136{
4137 int rv = -EIO;
4138
Philipp Reisner07782862010-08-31 12:00:50 +02004139 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004140 if (get_ldev_if_state(mdev, D_ATTACHING)) {
4141 drbd_bm_clear_all(mdev);
4142 rv = drbd_bm_write(mdev);
4143 put_ldev(mdev);
4144 }
4145
4146 return rv;
4147}
4148
4149static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4150{
4151 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Lars Ellenberg02851e92010-12-16 14:47:39 +01004152 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153
4154 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
4155
Lars Ellenberg02851e92010-12-16 14:47:39 +01004156 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004157 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01004158 rv = work->io_fn(mdev);
4159 drbd_bm_unlock(mdev);
4160 put_ldev(mdev);
4161 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162
4163 clear_bit(BITMAP_IO, &mdev->flags);
Philipp Reisner127b3172010-11-16 10:07:53 +01004164 smp_mb__after_clear_bit();
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165 wake_up(&mdev->misc_wait);
4166
4167 if (work->done)
4168 work->done(mdev, rv);
4169
4170 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
4171 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004172 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173
4174 return 1;
4175}
4176
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02004177void drbd_ldev_destroy(struct drbd_conf *mdev)
4178{
4179 lc_destroy(mdev->resync);
4180 mdev->resync = NULL;
4181 lc_destroy(mdev->act_log);
4182 mdev->act_log = NULL;
4183 __no_warn(local,
4184 drbd_free_bc(mdev->ldev);
4185 mdev->ldev = NULL;);
4186
4187 if (mdev->md_io_tmpp) {
4188 __free_page(mdev->md_io_tmpp);
4189 mdev->md_io_tmpp = NULL;
4190 }
4191 clear_bit(GO_DISKLESS, &mdev->flags);
4192}
4193
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004194static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4195{
4196 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02004197 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
4198 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02004199 * the protected members anymore, though, so once put_ldev reaches zero
4200 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004201 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004202 return 1;
4203}
4204
4205void drbd_go_diskless(struct drbd_conf *mdev)
4206{
4207 D_ASSERT(mdev->state.disk == D_FAILED);
4208 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Lars Ellenberg9d282872010-10-14 13:57:07 +02004209 drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004210}
4211
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212/**
4213 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
4214 * @mdev: DRBD device.
4215 * @io_fn: IO callback to be called when bitmap IO is possible
4216 * @done: callback to be called after the bitmap IO was performed
4217 * @why: Descriptive text of the reason for doing the IO
4218 *
4219 * While IO on the bitmap happens we freeze application IO thus we ensure
4220 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
4221 * called from worker context. It MUST NOT be used while a previous such
4222 * work is still pending!
4223 */
4224void drbd_queue_bitmap_io(struct drbd_conf *mdev,
4225 int (*io_fn)(struct drbd_conf *),
4226 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004227 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228{
4229 D_ASSERT(current == mdev->worker.task);
4230
4231 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
4232 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
4233 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
4234 if (mdev->bm_io_work.why)
4235 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
4236 why, mdev->bm_io_work.why);
4237
4238 mdev->bm_io_work.io_fn = io_fn;
4239 mdev->bm_io_work.done = done;
4240 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004241 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242
Philipp Reisner22afd7e2010-11-16 15:30:44 +01004243 spin_lock_irq(&mdev->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244 set_bit(BITMAP_IO, &mdev->flags);
4245 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01004246 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247 drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248 }
Philipp Reisner22afd7e2010-11-16 15:30:44 +01004249 spin_unlock_irq(&mdev->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004250}
4251
4252/**
4253 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
4254 * @mdev: DRBD device.
4255 * @io_fn: IO callback to be called when bitmap IO is possible
4256 * @why: Descriptive text of the reason for doing the IO
4257 *
4258 * freezes application IO while that the actual IO operations runs. This
4259 * functions MAY NOT be called from worker context.
4260 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004261int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
4262 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263{
4264 int rv;
4265
4266 D_ASSERT(current != mdev->worker.task);
4267
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004268 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4269 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004271 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272 rv = io_fn(mdev);
4273 drbd_bm_unlock(mdev);
4274
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004275 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4276 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004277
4278 return rv;
4279}
4280
4281void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4282{
4283 if ((mdev->ldev->md.flags & flag) != flag) {
4284 drbd_md_mark_dirty(mdev);
4285 mdev->ldev->md.flags |= flag;
4286 }
4287}
4288
4289void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4290{
4291 if ((mdev->ldev->md.flags & flag) != 0) {
4292 drbd_md_mark_dirty(mdev);
4293 mdev->ldev->md.flags &= ~flag;
4294 }
4295}
4296int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
4297{
4298 return (bdev->md.flags & flag) != 0;
4299}
4300
4301static void md_sync_timer_fn(unsigned long data)
4302{
4303 struct drbd_conf *mdev = (struct drbd_conf *) data;
4304
4305 drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
4306}
4307
4308static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4309{
4310 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02004311#ifdef DEBUG
4312 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
4313 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
4314#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316 return 1;
4317}
4318
4319#ifdef CONFIG_DRBD_FAULT_INJECTION
4320/* Fault insertion support including random number generator shamelessly
4321 * stolen from kernel/rcutorture.c */
4322struct fault_random_state {
4323 unsigned long state;
4324 unsigned long count;
4325};
4326
4327#define FAULT_RANDOM_MULT 39916801 /* prime */
4328#define FAULT_RANDOM_ADD 479001701 /* prime */
4329#define FAULT_RANDOM_REFRESH 10000
4330
4331/*
4332 * Crude but fast random-number generator. Uses a linear congruential
4333 * generator, with occasional help from get_random_bytes().
4334 */
4335static unsigned long
4336_drbd_fault_random(struct fault_random_state *rsp)
4337{
4338 long refresh;
4339
Roel Kluin49829ea2009-12-15 22:55:44 +01004340 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341 get_random_bytes(&refresh, sizeof(refresh));
4342 rsp->state += refresh;
4343 rsp->count = FAULT_RANDOM_REFRESH;
4344 }
4345 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
4346 return swahw32(rsp->state);
4347}
4348
4349static char *
4350_drbd_fault_str(unsigned int type) {
4351 static char *_faults[] = {
4352 [DRBD_FAULT_MD_WR] = "Meta-data write",
4353 [DRBD_FAULT_MD_RD] = "Meta-data read",
4354 [DRBD_FAULT_RS_WR] = "Resync write",
4355 [DRBD_FAULT_RS_RD] = "Resync read",
4356 [DRBD_FAULT_DT_WR] = "Data write",
4357 [DRBD_FAULT_DT_RD] = "Data read",
4358 [DRBD_FAULT_DT_RA] = "Data read ahead",
4359 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02004360 [DRBD_FAULT_AL_EE] = "EE allocation",
4361 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362 };
4363
4364 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
4365}
4366
4367unsigned int
4368_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
4369{
4370 static struct fault_random_state rrs = {0, 0};
4371
4372 unsigned int ret = (
4373 (fault_devs == 0 ||
4374 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
4375 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
4376
4377 if (ret) {
4378 fault_count++;
4379
Lars Ellenberg73835062010-05-27 11:51:56 +02004380 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004381 dev_warn(DEV, "***Simulating %s failure\n",
4382 _drbd_fault_str(type));
4383 }
4384
4385 return ret;
4386}
4387#endif
4388
4389const char *drbd_buildtag(void)
4390{
4391 /* DRBD built from external sources has here a reference to the
4392 git hash of the source code. */
4393
4394 static char buildtag[38] = "\0uilt-in";
4395
4396 if (buildtag[0] == 0) {
Cong Wangbc4854b2012-04-03 14:13:36 +08004397#ifdef MODULE
4398 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
4399#else
4400 buildtag[0] = 'b';
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402 }
4403
4404 return buildtag;
4405}
4406
4407module_init(drbd_init)
4408module_exit(drbd_cleanup)
4409
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410EXPORT_SYMBOL(drbd_conn_str);
4411EXPORT_SYMBOL(drbd_role_str);
4412EXPORT_SYMBOL(drbd_disk_str);
4413EXPORT_SYMBOL(drbd_set_st_err_str);