blob: c03e87f19e862fe77dc5e30c8a600b004d695079 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
59struct after_state_chg_work {
60 struct drbd_work w;
61 union drbd_state os;
62 union drbd_state ns;
63 enum chg_state_flags flags;
64 struct completion *done;
65};
66
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020067static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068int drbdd_init(struct drbd_thread *);
69int drbd_worker(struct drbd_thread *);
70int drbd_asender(struct drbd_thread *);
71
72int drbd_init(void);
73static int drbd_open(struct block_device *bdev, fmode_t mode);
74static int drbd_release(struct gendisk *gd, fmode_t mode);
75static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
76static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
77 union drbd_state ns, enum chg_state_flags flags);
78static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79static void md_sync_timer_fn(unsigned long data);
80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +020081static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070082
Philipp Reisnerb411b362009-09-25 16:07:19 -070083MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
84 "Lars Ellenberg <lars@linbit.com>");
85MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
86MODULE_VERSION(REL_VERSION);
87MODULE_LICENSE("GPL");
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010088MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
89 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070090MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
91
92#include <linux/moduleparam.h>
93/* allow_open_on_secondary */
94MODULE_PARM_DESC(allow_oos, "DONT USE!");
95/* thanks to these macros, if compiled into the kernel (not-module),
96 * this becomes the boot parameter drbd.minor_count */
97module_param(minor_count, uint, 0444);
98module_param(disable_sendpage, bool, 0644);
99module_param(allow_oos, bool, 0);
100module_param(cn_idx, uint, 0444);
101module_param(proc_details, int, 0644);
102
103#ifdef CONFIG_DRBD_FAULT_INJECTION
104int enable_faults;
105int fault_rate;
106static int fault_count;
107int fault_devs;
108/* bitmap of enabled faults */
109module_param(enable_faults, int, 0664);
110/* fault rate % value - applies to all enabled faults */
111module_param(fault_rate, int, 0664);
112/* count of faults inserted */
113module_param(fault_count, int, 0664);
114/* bitmap of devices to insert faults on */
115module_param(fault_devs, int, 0644);
116#endif
117
118/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100119unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Rusty Russell90ab5ee2012-01-13 09:32:20 +1030120bool disable_sendpage;
121bool allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122unsigned int cn_idx = CN_IDX_DRBD;
123int proc_details; /* Detail level in proc drbd*/
124
125/* Module parameter for setting the user mode helper program
126 * to run. Default is /sbin/drbdadm */
127char usermode_helper[80] = "/sbin/drbdadm";
128
129module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
130
131/* in 2.6.x, our device mapping and config info contains our virtual gendisks
132 * as member "struct gendisk *vdisk;"
133 */
134struct drbd_conf **minor_table;
135
136struct kmem_cache *drbd_request_cache;
137struct kmem_cache *drbd_ee_cache; /* epoch entries */
138struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
139struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
140mempool_t *drbd_request_mempool;
141mempool_t *drbd_ee_mempool;
Lars Ellenberg42818082011-02-23 12:39:46 +0100142mempool_t *drbd_md_io_page_pool;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700143
144/* I do not use a standard mempool, because:
145 1) I want to hand out the pre-allocated objects first.
146 2) I want to be able to interrupt sleeping allocation with a signal.
147 Note: This is a single linked list, the next pointer is the private
148 member of struct page.
149 */
150struct page *drbd_pp_pool;
151spinlock_t drbd_pp_lock;
152int drbd_pp_vacant;
153wait_queue_head_t drbd_pp_wait;
154
155DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
156
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100157static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158 .owner = THIS_MODULE,
159 .open = drbd_open,
160 .release = drbd_release,
161};
162
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163#ifdef __CHECKER__
164/* When checking with sparse, and this is an inline function, sparse will
165 give tons of false positives. When this is a real functions sparse works.
166 */
167int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
168{
169 int io_allowed;
170
171 atomic_inc(&mdev->local_cnt);
172 io_allowed = (mdev->state.disk >= mins);
173 if (!io_allowed) {
174 if (atomic_dec_and_test(&mdev->local_cnt))
175 wake_up(&mdev->misc_wait);
176 }
177 return io_allowed;
178}
179
180#endif
181
182/**
183 * DOC: The transfer log
184 *
185 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
186 * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
187 * of the list. There is always at least one &struct drbd_tl_epoch object.
188 *
189 * Each &struct drbd_tl_epoch has a circular double linked list of requests
190 * attached.
191 */
192static int tl_init(struct drbd_conf *mdev)
193{
194 struct drbd_tl_epoch *b;
195
196 /* during device minor initialization, we may well use GFP_KERNEL */
197 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
198 if (!b)
199 return 0;
200 INIT_LIST_HEAD(&b->requests);
201 INIT_LIST_HEAD(&b->w.list);
202 b->next = NULL;
203 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200204 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
206
207 mdev->oldest_tle = b;
208 mdev->newest_tle = b;
209 INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100210 INIT_LIST_HEAD(&mdev->barrier_acked_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700211
212 mdev->tl_hash = NULL;
213 mdev->tl_hash_s = 0;
214
215 return 1;
216}
217
218static void tl_cleanup(struct drbd_conf *mdev)
219{
220 D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
221 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
222 kfree(mdev->oldest_tle);
223 mdev->oldest_tle = NULL;
224 kfree(mdev->unused_spare_tle);
225 mdev->unused_spare_tle = NULL;
226 kfree(mdev->tl_hash);
227 mdev->tl_hash = NULL;
228 mdev->tl_hash_s = 0;
229}
230
231/**
232 * _tl_add_barrier() - Adds a barrier to the transfer log
233 * @mdev: DRBD device.
234 * @new: Barrier to be added before the current head of the TL.
235 *
236 * The caller must hold the req_lock.
237 */
238void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
239{
240 struct drbd_tl_epoch *newest_before;
241
242 INIT_LIST_HEAD(&new->requests);
243 INIT_LIST_HEAD(&new->w.list);
244 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
245 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200246 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700247
248 newest_before = mdev->newest_tle;
Lars Ellenbergc088b2d2012-03-23 13:57:13 +0100249 new->br_number = newest_before->br_number+1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 if (mdev->newest_tle != new) {
251 mdev->newest_tle->next = new;
252 mdev->newest_tle = new;
253 }
254}
255
256/**
257 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
258 * @mdev: DRBD device.
259 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
260 * @set_size: Expected number of requests before that barrier.
261 *
262 * In case the passed barrier_nr or set_size does not match the oldest
263 * &struct drbd_tl_epoch objects this function will cause a termination
264 * of the connection.
265 */
266void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
267 unsigned int set_size)
268{
269 struct drbd_tl_epoch *b, *nob; /* next old barrier */
270 struct list_head *le, *tle;
271 struct drbd_request *r;
272
273 spin_lock_irq(&mdev->req_lock);
274
275 b = mdev->oldest_tle;
276
277 /* first some paranoia code */
278 if (b == NULL) {
279 dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
280 barrier_nr);
281 goto bail;
282 }
283 if (b->br_number != barrier_nr) {
284 dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
285 barrier_nr, b->br_number);
286 goto bail;
287 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200288 if (b->n_writes != set_size) {
289 dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
290 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700291 goto bail;
292 }
293
294 /* Clean up list of requests processed during current epoch */
295 list_for_each_safe(le, tle, &b->requests) {
296 r = list_entry(le, struct drbd_request, tl_requests);
297 _req_mod(r, barrier_acked);
298 }
299 /* There could be requests on the list waiting for completion
300 of the write to the local disk. To avoid corruptions of
301 slab's data structures we have to remove the lists head.
302
303 Also there could have been a barrier ack out of sequence, overtaking
304 the write acks - which would be a bug and violating write ordering.
305 To not deadlock in case we lose connection while such requests are
306 still pending, we need some way to find them for the
307 _req_mode(connection_lost_while_pending).
308
309 These have been list_move'd to the out_of_sequence_requests list in
310 _req_mod(, barrier_acked) above.
311 */
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100312 list_splice_init(&b->requests, &mdev->barrier_acked_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313
314 nob = b->next;
315 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
316 _tl_add_barrier(mdev, b);
317 if (nob)
318 mdev->oldest_tle = nob;
319 /* if nob == NULL b was the only barrier, and becomes the new
320 barrier. Therefore mdev->oldest_tle points already to b */
321 } else {
322 D_ASSERT(nob != NULL);
323 mdev->oldest_tle = nob;
324 kfree(b);
325 }
326
327 spin_unlock_irq(&mdev->req_lock);
328 dec_ap_pending(mdev);
329
330 return;
331
332bail:
333 spin_unlock_irq(&mdev->req_lock);
334 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
335}
336
Philipp Reisner617049a2010-12-22 12:48:31 +0100337
Philipp Reisner11b58e72010-05-12 17:08:26 +0200338/**
339 * _tl_restart() - Walks the transfer log, and applies an action to all requests
340 * @mdev: DRBD device.
341 * @what: The action/event to perform with all request objects
342 *
343 * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
Philipp Reisnerfd2491f2011-07-18 16:25:15 +0200344 * restart_frozen_disk_io.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200345 */
346static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
347{
348 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200349 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200350 struct drbd_request *req;
351 int rv, n_writes, n_reads;
352
353 b = mdev->oldest_tle;
354 pn = &mdev->oldest_tle;
355 while (b) {
356 n_writes = 0;
357 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200358 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200359 list_for_each_safe(le, tle, &b->requests) {
360 req = list_entry(le, struct drbd_request, tl_requests);
361 rv = _req_mod(req, what);
362
363 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
364 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
365 }
366 tmp = b->next;
367
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200368 if (n_writes) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200369 if (what == resend) {
370 b->n_writes = n_writes;
371 if (b->w.cb == NULL) {
372 b->w.cb = w_send_barrier;
373 inc_ap_pending(mdev);
374 set_bit(CREATE_BARRIER, &mdev->flags);
375 }
376
377 drbd_queue_work(&mdev->data.work, &b->w);
378 }
379 pn = &b->next;
380 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200381 if (n_reads)
382 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200383 /* there could still be requests on that ring list,
384 * in case local io is still pending */
385 list_del(&b->requests);
386
387 /* dec_ap_pending corresponding to queue_barrier.
388 * the newest barrier may not have been queued yet,
389 * in which case w.cb is still NULL. */
390 if (b->w.cb != NULL)
391 dec_ap_pending(mdev);
392
393 if (b == mdev->newest_tle) {
394 /* recycle, but reinit! */
395 D_ASSERT(tmp == NULL);
396 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200397 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200398 INIT_LIST_HEAD(&b->w.list);
399 b->w.cb = NULL;
400 b->br_number = net_random();
401 b->n_writes = 0;
402
403 *pn = b;
404 break;
405 }
406 *pn = tmp;
407 kfree(b);
408 }
409 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200410 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200411 }
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100412
413 /* Actions operating on the disk state, also want to work on
414 requests that got barrier acked. */
415 switch (what) {
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100416 case fail_frozen_disk_io:
417 case restart_frozen_disk_io:
418 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
419 req = list_entry(le, struct drbd_request, tl_requests);
420 _req_mod(req, what);
421 }
422
423 case connection_lost_while_pending:
424 case resend:
425 break;
426 default:
427 dev_err(DEV, "what = %d in _tl_restart()\n", what);
428 }
Philipp Reisner11b58e72010-05-12 17:08:26 +0200429}
430
Philipp Reisnerb411b362009-09-25 16:07:19 -0700431
432/**
433 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
434 * @mdev: DRBD device.
435 *
436 * This is called after the connection to the peer was lost. The storage covered
437 * by the requests on the transfer gets marked as our of sync. Called from the
438 * receiver thread and the worker thread.
439 */
440void tl_clear(struct drbd_conf *mdev)
441{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 struct list_head *le, *tle;
443 struct drbd_request *r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444
445 spin_lock_irq(&mdev->req_lock);
446
Philipp Reisner11b58e72010-05-12 17:08:26 +0200447 _tl_restart(mdev, connection_lost_while_pending);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448
449 /* we expect this list to be empty. */
450 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
451
452 /* but just in case, clean it up anyways! */
453 list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
454 r = list_entry(le, struct drbd_request, tl_requests);
455 /* It would be nice to complete outside of spinlock.
456 * But this is easier for now. */
457 _req_mod(r, connection_lost_while_pending);
458 }
459
460 /* ensure bit indicating barrier is required is clear */
461 clear_bit(CREATE_BARRIER, &mdev->flags);
462
Philipp Reisner288f4222010-05-27 15:07:43 +0200463 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
464
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465 spin_unlock_irq(&mdev->req_lock);
466}
467
Philipp Reisner11b58e72010-05-12 17:08:26 +0200468void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
469{
470 spin_lock_irq(&mdev->req_lock);
471 _tl_restart(mdev, what);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472 spin_unlock_irq(&mdev->req_lock);
473}
474
475/**
Philipp Reisnerfd2491f2011-07-18 16:25:15 +0200476 * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
477 * @mdev: DRBD device.
478 */
479void tl_abort_disk_io(struct drbd_conf *mdev)
480{
481 struct drbd_tl_epoch *b;
482 struct list_head *le, *tle;
483 struct drbd_request *req;
484
485 spin_lock_irq(&mdev->req_lock);
486 b = mdev->oldest_tle;
487 while (b) {
488 list_for_each_safe(le, tle, &b->requests) {
489 req = list_entry(le, struct drbd_request, tl_requests);
490 if (!(req->rq_state & RQ_LOCAL_PENDING))
491 continue;
492 _req_mod(req, abort_disk_io);
493 }
494 b = b->next;
495 }
496
497 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
498 req = list_entry(le, struct drbd_request, tl_requests);
499 if (!(req->rq_state & RQ_LOCAL_PENDING))
500 continue;
501 _req_mod(req, abort_disk_io);
502 }
503
504 spin_unlock_irq(&mdev->req_lock);
505}
506
507/**
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100508 * cl_wide_st_chg() - true if the state change is a cluster wide one
Philipp Reisnerb411b362009-09-25 16:07:19 -0700509 * @mdev: DRBD device.
510 * @os: old (current) state.
511 * @ns: new (wanted) state.
512 */
513static int cl_wide_st_chg(struct drbd_conf *mdev,
514 union drbd_state os, union drbd_state ns)
515{
516 return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
517 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
518 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
519 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
Philipp Reisner02ee8f92011-03-14 11:54:47 +0100520 (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
Philipp Reisnerb411b362009-09-25 16:07:19 -0700521 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
522 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
523}
524
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100525enum drbd_state_rv
526drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
527 union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700528{
529 unsigned long flags;
530 union drbd_state os, ns;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100531 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700532
533 spin_lock_irqsave(&mdev->req_lock, flags);
534 os = mdev->state;
535 ns.i = (os.i & ~mask.i) | val.i;
536 rv = _drbd_set_state(mdev, ns, f, NULL);
537 ns = mdev->state;
538 spin_unlock_irqrestore(&mdev->req_lock, flags);
539
540 return rv;
541}
542
543/**
544 * drbd_force_state() - Impose a change which happens outside our control on our state
545 * @mdev: DRBD device.
546 * @mask: mask of state bits to change.
547 * @val: value of new state bits.
548 */
549void drbd_force_state(struct drbd_conf *mdev,
550 union drbd_state mask, union drbd_state val)
551{
552 drbd_change_state(mdev, CS_HARD, mask, val);
553}
554
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100555static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
556static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
557 union drbd_state,
558 union drbd_state);
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200559enum sanitize_state_warnings {
560 NO_WARNING,
561 ABORTED_ONLINE_VERIFY,
562 ABORTED_RESYNC,
563 CONNECTION_LOST_NEGOTIATING,
564 IMPLICITLY_UPGRADED_DISK,
565 IMPLICITLY_UPGRADED_PDSK,
566};
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200568 union drbd_state ns, enum sanitize_state_warnings *warn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700569int drbd_send_state_req(struct drbd_conf *,
570 union drbd_state, union drbd_state);
571
Andreas Gruenbacherc8b32562010-12-08 01:06:16 +0100572static enum drbd_state_rv
573_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
574 union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700575{
576 union drbd_state os, ns;
577 unsigned long flags;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100578 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579
580 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
581 return SS_CW_SUCCESS;
582
583 if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
584 return SS_CW_FAILED_BY_PEER;
585
586 rv = 0;
587 spin_lock_irqsave(&mdev->req_lock, flags);
588 os = mdev->state;
589 ns.i = (os.i & ~mask.i) | val.i;
590 ns = sanitize_state(mdev, os, ns, NULL);
591
592 if (!cl_wide_st_chg(mdev, os, ns))
593 rv = SS_CW_NO_NEED;
594 if (!rv) {
595 rv = is_valid_state(mdev, ns);
596 if (rv == SS_SUCCESS) {
597 rv = is_valid_state_transition(mdev, ns, os);
598 if (rv == SS_SUCCESS)
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100599 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 }
601 }
602 spin_unlock_irqrestore(&mdev->req_lock, flags);
603
604 return rv;
605}
606
607/**
608 * drbd_req_state() - Perform an eventually cluster wide state change
609 * @mdev: DRBD device.
610 * @mask: mask of state bits to change.
611 * @val: value of new state bits.
612 * @f: flags
613 *
614 * Should not be called directly, use drbd_request_state() or
615 * _drbd_request_state().
616 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100617static enum drbd_state_rv
618drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
619 union drbd_state val, enum chg_state_flags f)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620{
621 struct completion done;
622 unsigned long flags;
623 union drbd_state os, ns;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100624 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625
626 init_completion(&done);
627
628 if (f & CS_SERIALIZE)
629 mutex_lock(&mdev->state_mutex);
630
631 spin_lock_irqsave(&mdev->req_lock, flags);
632 os = mdev->state;
633 ns.i = (os.i & ~mask.i) | val.i;
634 ns = sanitize_state(mdev, os, ns, NULL);
635
636 if (cl_wide_st_chg(mdev, os, ns)) {
637 rv = is_valid_state(mdev, ns);
638 if (rv == SS_SUCCESS)
639 rv = is_valid_state_transition(mdev, ns, os);
640 spin_unlock_irqrestore(&mdev->req_lock, flags);
641
642 if (rv < SS_SUCCESS) {
643 if (f & CS_VERBOSE)
644 print_st_err(mdev, os, ns, rv);
645 goto abort;
646 }
647
648 drbd_state_lock(mdev);
649 if (!drbd_send_state_req(mdev, mask, val)) {
650 drbd_state_unlock(mdev);
651 rv = SS_CW_FAILED_BY_PEER;
652 if (f & CS_VERBOSE)
653 print_st_err(mdev, os, ns, rv);
654 goto abort;
655 }
656
657 wait_event(mdev->state_wait,
658 (rv = _req_st_cond(mdev, mask, val)));
659
660 if (rv < SS_SUCCESS) {
661 drbd_state_unlock(mdev);
662 if (f & CS_VERBOSE)
663 print_st_err(mdev, os, ns, rv);
664 goto abort;
665 }
666 spin_lock_irqsave(&mdev->req_lock, flags);
667 os = mdev->state;
668 ns.i = (os.i & ~mask.i) | val.i;
669 rv = _drbd_set_state(mdev, ns, f, &done);
670 drbd_state_unlock(mdev);
671 } else {
672 rv = _drbd_set_state(mdev, ns, f, &done);
673 }
674
675 spin_unlock_irqrestore(&mdev->req_lock, flags);
676
677 if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
678 D_ASSERT(current != mdev->worker.task);
679 wait_for_completion(&done);
680 }
681
682abort:
683 if (f & CS_SERIALIZE)
684 mutex_unlock(&mdev->state_mutex);
685
686 return rv;
687}
688
689/**
690 * _drbd_request_state() - Request a state change (with flags)
691 * @mdev: DRBD device.
692 * @mask: mask of state bits to change.
693 * @val: value of new state bits.
694 * @f: flags
695 *
696 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
697 * flag, or when logging of failed state change requests is not desired.
698 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100699enum drbd_state_rv
700_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
701 union drbd_state val, enum chg_state_flags f)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702{
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100703 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700704
705 wait_event(mdev->state_wait,
706 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
707
708 return rv;
709}
710
711static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
712{
713 dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
714 name,
715 drbd_conn_str(ns.conn),
716 drbd_role_str(ns.role),
717 drbd_role_str(ns.peer),
718 drbd_disk_str(ns.disk),
719 drbd_disk_str(ns.pdsk),
Philipp Reisnerfb22c402010-09-08 23:20:21 +0200720 is_susp(ns) ? 's' : 'r',
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 ns.aftr_isp ? 'a' : '-',
722 ns.peer_isp ? 'p' : '-',
723 ns.user_isp ? 'u' : '-'
724 );
725}
726
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100727void print_st_err(struct drbd_conf *mdev, union drbd_state os,
728 union drbd_state ns, enum drbd_state_rv err)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729{
730 if (err == SS_IN_TRANSIENT_STATE)
731 return;
732 dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
733 print_st(mdev, " state", os);
734 print_st(mdev, "wanted", ns);
735}
736
737
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738/**
739 * is_valid_state() - Returns an SS_ error code if ns is not valid
740 * @mdev: DRBD device.
741 * @ns: State to consider.
742 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100743static enum drbd_state_rv
744is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745{
746 /* See drbd_state_sw_errors in drbd_strings.c */
747
748 enum drbd_fencing_p fp;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100749 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750
751 fp = FP_DONT_CARE;
752 if (get_ldev(mdev)) {
753 fp = mdev->ldev->dc.fencing;
754 put_ldev(mdev);
755 }
756
757 if (get_net_conf(mdev)) {
758 if (!mdev->net_conf->two_primaries &&
759 ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
760 rv = SS_TWO_PRIMARIES;
761 put_net_conf(mdev);
762 }
763
764 if (rv <= 0)
765 /* already found a reason to abort */;
766 else if (ns.role == R_SECONDARY && mdev->open_cnt)
767 rv = SS_DEVICE_IN_USE;
768
769 else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
770 rv = SS_NO_UP_TO_DATE_DISK;
771
772 else if (fp >= FP_RESOURCE &&
773 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
774 rv = SS_PRIMARY_NOP;
775
776 else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
777 rv = SS_NO_UP_TO_DATE_DISK;
778
779 else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
780 rv = SS_NO_LOCAL_DISK;
781
782 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
783 rv = SS_NO_REMOTE_DISK;
784
Lars Ellenberg8d4ce822010-04-01 16:59:32 +0200785 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
786 rv = SS_NO_UP_TO_DATE_DISK;
787
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788 else if ((ns.conn == C_CONNECTED ||
789 ns.conn == C_WF_BITMAP_S ||
790 ns.conn == C_SYNC_SOURCE ||
791 ns.conn == C_PAUSED_SYNC_S) &&
792 ns.disk == D_OUTDATED)
793 rv = SS_CONNECTED_OUTDATES;
794
795 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
796 (mdev->sync_conf.verify_alg[0] == 0))
797 rv = SS_NO_VERIFY_ALG;
798
799 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
800 mdev->agreed_pro_version < 88)
801 rv = SS_NOT_SUPPORTED;
802
Philipp Reisnerfa7d9392011-05-17 14:48:55 +0200803 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
804 rv = SS_CONNECTED_OUTDATES;
805
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806 return rv;
807}
808
809/**
810 * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
811 * @mdev: DRBD device.
812 * @ns: new state.
813 * @os: old state.
814 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100815static enum drbd_state_rv
816is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
817 union drbd_state os)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818{
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100819 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820
821 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
822 os.conn > C_CONNECTED)
823 rv = SS_RESYNC_RUNNING;
824
825 if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
826 rv = SS_ALREADY_STANDALONE;
827
828 if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
829 rv = SS_IS_DISKLESS;
830
831 if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
832 rv = SS_NO_NET_CONFIG;
833
834 if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
835 rv = SS_LOWER_THAN_OUTDATED;
836
837 if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
838 rv = SS_IN_TRANSIENT_STATE;
839
840 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
841 rv = SS_IN_TRANSIENT_STATE;
842
Philipp Reisner197296f2012-03-26 16:47:11 +0200843 /* While establishing a connection only allow cstate to change.
844 Delay/refuse role changes, detach attach etc... */
845 if (test_bit(STATE_SENT, &mdev->flags) &&
846 !(os.conn == C_WF_REPORT_PARAMS ||
847 (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
848 rv = SS_IN_TRANSIENT_STATE;
849
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
851 rv = SS_NEED_CONNECTION;
852
853 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
854 ns.conn != os.conn && os.conn > C_CONNECTED)
855 rv = SS_RESYNC_RUNNING;
856
857 if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
858 os.conn < C_CONNECTED)
859 rv = SS_NEED_CONNECTION;
860
Philipp Reisner1fc80cf2010-11-22 14:18:47 +0100861 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
862 && os.conn < C_WF_REPORT_PARAMS)
863 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
864
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865 return rv;
866}
867
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200868static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
869{
870 static const char *msg_table[] = {
871 [NO_WARNING] = "",
872 [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
873 [ABORTED_RESYNC] = "Resync aborted.",
874 [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
875 [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
876 [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
877 };
878
879 if (warn != NO_WARNING)
880 dev_warn(DEV, "%s\n", msg_table[warn]);
881}
882
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883/**
884 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
885 * @mdev: DRBD device.
886 * @os: old state.
887 * @ns: new state.
888 * @warn_sync_abort:
889 *
890 * When we loose connection, we have to set the state of the peers disk (pdsk)
891 * to D_UNKNOWN. This rule and many more along those lines are in this function.
892 */
893static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200894 union drbd_state ns, enum sanitize_state_warnings *warn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700895{
896 enum drbd_fencing_p fp;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +0100897 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200899 if (warn)
900 *warn = NO_WARNING;
901
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 fp = FP_DONT_CARE;
903 if (get_ldev(mdev)) {
904 fp = mdev->ldev->dc.fencing;
905 put_ldev(mdev);
906 }
907
908 /* Disallow Network errors to configure a device's network part */
909 if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
910 os.conn <= C_DISCONNECTING)
911 ns.conn = os.conn;
912
Lars Ellenbergf2906e12010-07-21 17:04:32 +0200913 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
914 * If you try to go into some Sync* state, that shall fail (elsewhere). */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
Lars Ellenberg545752d2011-12-05 14:39:25 +0100916 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 ns.conn = os.conn;
918
Lars Ellenberg82f59cc2010-10-16 12:13:47 +0200919 /* we cannot fail (again) if we already detached */
920 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
921 ns.disk = D_DISKLESS;
922
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 /* After C_DISCONNECTING only C_STANDALONE may follow */
924 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
925 ns.conn = os.conn;
926
927 if (ns.conn < C_CONNECTED) {
928 ns.peer_isp = 0;
929 ns.peer = R_UNKNOWN;
930 if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
931 ns.pdsk = D_UNKNOWN;
932 }
933
934 /* Clear the aftr_isp when becoming unconfigured */
935 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
936 ns.aftr_isp = 0;
937
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938 /* Abort resync if a disk fails/detaches */
939 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
940 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200941 if (warn)
942 *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
943 ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 ns.conn = C_CONNECTED;
945 }
946
Philipp Reisnerb411b362009-09-25 16:07:19 -0700947 /* Connection breaks down before we finished "Negotiating" */
948 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
949 get_ldev_if_state(mdev, D_NEGOTIATING)) {
950 if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
951 ns.disk = mdev->new_state_tmp.disk;
952 ns.pdsk = mdev->new_state_tmp.pdsk;
953 } else {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200954 if (warn)
955 *warn = CONNECTION_LOST_NEGOTIATING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 ns.disk = D_DISKLESS;
957 ns.pdsk = D_UNKNOWN;
958 }
959 put_ldev(mdev);
960 }
961
Philipp Reisnerab17b68f2010-11-17 16:54:36 +0100962 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
963 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
964 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
965 ns.disk = D_UP_TO_DATE;
966 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
967 ns.pdsk = D_UP_TO_DATE;
968 }
969
970 /* Implications of the connection stat on the disk states */
971 disk_min = D_DISKLESS;
972 disk_max = D_UP_TO_DATE;
973 pdsk_min = D_INCONSISTENT;
974 pdsk_max = D_UNKNOWN;
975 switch ((enum drbd_conns)ns.conn) {
976 case C_WF_BITMAP_T:
977 case C_PAUSED_SYNC_T:
978 case C_STARTING_SYNC_T:
979 case C_WF_SYNC_UUID:
980 case C_BEHIND:
981 disk_min = D_INCONSISTENT;
982 disk_max = D_OUTDATED;
983 pdsk_min = D_UP_TO_DATE;
984 pdsk_max = D_UP_TO_DATE;
985 break;
986 case C_VERIFY_S:
987 case C_VERIFY_T:
988 disk_min = D_UP_TO_DATE;
989 disk_max = D_UP_TO_DATE;
990 pdsk_min = D_UP_TO_DATE;
991 pdsk_max = D_UP_TO_DATE;
992 break;
993 case C_CONNECTED:
994 disk_min = D_DISKLESS;
995 disk_max = D_UP_TO_DATE;
996 pdsk_min = D_DISKLESS;
997 pdsk_max = D_UP_TO_DATE;
998 break;
999 case C_WF_BITMAP_S:
1000 case C_PAUSED_SYNC_S:
1001 case C_STARTING_SYNC_S:
1002 case C_AHEAD:
1003 disk_min = D_UP_TO_DATE;
1004 disk_max = D_UP_TO_DATE;
1005 pdsk_min = D_INCONSISTENT;
1006 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
1007 break;
1008 case C_SYNC_TARGET:
1009 disk_min = D_INCONSISTENT;
1010 disk_max = D_INCONSISTENT;
1011 pdsk_min = D_UP_TO_DATE;
1012 pdsk_max = D_UP_TO_DATE;
1013 break;
1014 case C_SYNC_SOURCE:
1015 disk_min = D_UP_TO_DATE;
1016 disk_max = D_UP_TO_DATE;
1017 pdsk_min = D_INCONSISTENT;
1018 pdsk_max = D_INCONSISTENT;
1019 break;
1020 case C_STANDALONE:
1021 case C_DISCONNECTING:
1022 case C_UNCONNECTED:
1023 case C_TIMEOUT:
1024 case C_BROKEN_PIPE:
1025 case C_NETWORK_FAILURE:
1026 case C_PROTOCOL_ERROR:
1027 case C_TEAR_DOWN:
1028 case C_WF_CONNECTION:
1029 case C_WF_REPORT_PARAMS:
1030 case C_MASK:
1031 break;
1032 }
1033 if (ns.disk > disk_max)
1034 ns.disk = disk_max;
1035
1036 if (ns.disk < disk_min) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001037 if (warn)
1038 *warn = IMPLICITLY_UPGRADED_DISK;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +01001039 ns.disk = disk_min;
1040 }
1041 if (ns.pdsk > pdsk_max)
1042 ns.pdsk = pdsk_max;
1043
1044 if (ns.pdsk < pdsk_min) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001045 if (warn)
1046 *warn = IMPLICITLY_UPGRADED_PDSK;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +01001047 ns.pdsk = pdsk_min;
1048 }
1049
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 if (fp == FP_STONITH &&
Philipp Reisner0a492162009-10-21 13:08:29 +02001051 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
1052 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001053 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
Philipp Reisner265be2d2010-05-31 10:14:17 +02001054
1055 if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
1056 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
1057 !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001058 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059
1060 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
1061 if (ns.conn == C_SYNC_SOURCE)
1062 ns.conn = C_PAUSED_SYNC_S;
1063 if (ns.conn == C_SYNC_TARGET)
1064 ns.conn = C_PAUSED_SYNC_T;
1065 } else {
1066 if (ns.conn == C_PAUSED_SYNC_S)
1067 ns.conn = C_SYNC_SOURCE;
1068 if (ns.conn == C_PAUSED_SYNC_T)
1069 ns.conn = C_SYNC_TARGET;
1070 }
1071
1072 return ns;
1073}
1074
1075/* helper for __drbd_set_state */
1076static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
1077{
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001078 if (mdev->agreed_pro_version < 90)
1079 mdev->ov_start_sector = 0;
1080 mdev->rs_total = drbd_bm_bits(mdev);
1081 mdev->ov_position = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 if (cs == C_VERIFY_T) {
1083 /* starting online verify from an arbitrary position
1084 * does not fit well into the existing protocol.
1085 * on C_VERIFY_T, we initialize ov_left and friends
1086 * implicitly in receive_DataRequest once the
1087 * first P_OV_REQUEST is received */
1088 mdev->ov_start_sector = ~(sector_t)0;
1089 } else {
1090 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001091 if (bit >= mdev->rs_total) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092 mdev->ov_start_sector =
1093 BM_BIT_TO_SECT(mdev->rs_total - 1);
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001094 mdev->rs_total = 1;
1095 } else
1096 mdev->rs_total -= bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097 mdev->ov_position = mdev->ov_start_sector;
1098 }
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001099 mdev->ov_left = mdev->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100}
1101
Philipp Reisner07782862010-08-31 12:00:50 +02001102static void drbd_resume_al(struct drbd_conf *mdev)
1103{
1104 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
1105 dev_info(DEV, "Resumed AL updates\n");
1106}
1107
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108/**
1109 * __drbd_set_state() - Set a new DRBD state
1110 * @mdev: DRBD device.
1111 * @ns: new state.
1112 * @flags: Flags
1113 * @done: Optional completion, that will get completed after the after_state_ch() finished
1114 *
1115 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
1116 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001117enum drbd_state_rv
1118__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1119 enum chg_state_flags flags, struct completion *done)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001120{
1121 union drbd_state os;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001122 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001123 enum sanitize_state_warnings ssw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001124 struct after_state_chg_work *ascw;
1125
1126 os = mdev->state;
1127
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001128 ns = sanitize_state(mdev, os, ns, &ssw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001129
1130 if (ns.i == os.i)
1131 return SS_NOTHING_TO_DO;
1132
1133 if (!(flags & CS_HARD)) {
1134 /* pre-state-change checks ; only look at ns */
1135 /* See drbd_state_sw_errors in drbd_strings.c */
1136
1137 rv = is_valid_state(mdev, ns);
1138 if (rv < SS_SUCCESS) {
1139 /* If the old state was illegal as well, then let
1140 this happen...*/
1141
Philipp Reisner1616a252010-06-10 16:55:15 +02001142 if (is_valid_state(mdev, os) == rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143 rv = is_valid_state_transition(mdev, ns, os);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144 } else
1145 rv = is_valid_state_transition(mdev, ns, os);
1146 }
1147
1148 if (rv < SS_SUCCESS) {
1149 if (flags & CS_VERBOSE)
1150 print_st_err(mdev, os, ns, rv);
1151 return rv;
1152 }
1153
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001154 print_sanitize_warnings(mdev, ssw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001155
1156 {
Andreas Gruenbacher662d91a2010-12-07 03:01:41 +01001157 char *pbp, pb[300];
1158 pbp = pb;
1159 *pbp = 0;
1160 if (ns.role != os.role)
1161 pbp += sprintf(pbp, "role( %s -> %s ) ",
1162 drbd_role_str(os.role),
1163 drbd_role_str(ns.role));
1164 if (ns.peer != os.peer)
1165 pbp += sprintf(pbp, "peer( %s -> %s ) ",
1166 drbd_role_str(os.peer),
1167 drbd_role_str(ns.peer));
1168 if (ns.conn != os.conn)
1169 pbp += sprintf(pbp, "conn( %s -> %s ) ",
1170 drbd_conn_str(os.conn),
1171 drbd_conn_str(ns.conn));
1172 if (ns.disk != os.disk)
1173 pbp += sprintf(pbp, "disk( %s -> %s ) ",
1174 drbd_disk_str(os.disk),
1175 drbd_disk_str(ns.disk));
1176 if (ns.pdsk != os.pdsk)
1177 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
1178 drbd_disk_str(os.pdsk),
1179 drbd_disk_str(ns.pdsk));
1180 if (is_susp(ns) != is_susp(os))
1181 pbp += sprintf(pbp, "susp( %d -> %d ) ",
1182 is_susp(os),
1183 is_susp(ns));
1184 if (ns.aftr_isp != os.aftr_isp)
1185 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
1186 os.aftr_isp,
1187 ns.aftr_isp);
1188 if (ns.peer_isp != os.peer_isp)
1189 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
1190 os.peer_isp,
1191 ns.peer_isp);
1192 if (ns.user_isp != os.user_isp)
1193 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
1194 os.user_isp,
1195 ns.user_isp);
1196 dev_info(DEV, "%s\n", pb);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 }
1198
1199 /* solve the race between becoming unconfigured,
1200 * worker doing the cleanup, and
1201 * admin reconfiguring us:
1202 * on (re)configure, first set CONFIG_PENDING,
1203 * then wait for a potentially exiting worker,
1204 * start the worker, and schedule one no_op.
1205 * then proceed with configuration.
1206 */
1207 if (ns.disk == D_DISKLESS &&
1208 ns.conn == C_STANDALONE &&
1209 ns.role == R_SECONDARY &&
1210 !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
1211 set_bit(DEVICE_DYING, &mdev->flags);
1212
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001213 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1214 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
1215 * drbd_ldev_destroy() won't happen before our corresponding
1216 * after_state_ch works run, where we put_ldev again. */
1217 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1218 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1219 atomic_inc(&mdev->local_cnt);
1220
1221 mdev->state = ns;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001222
1223 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1224 drbd_print_uuids(mdev, "attached to UUIDs");
1225
Philipp Reisnerb411b362009-09-25 16:07:19 -07001226 wake_up(&mdev->misc_wait);
1227 wake_up(&mdev->state_wait);
1228
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229 /* aborted verify run. log the last position */
1230 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1231 ns.conn < C_CONNECTED) {
1232 mdev->ov_start_sector =
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001233 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 dev_info(DEV, "Online Verify reached sector %llu\n",
1235 (unsigned long long)mdev->ov_start_sector);
1236 }
1237
1238 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1239 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
1240 dev_info(DEV, "Syncer continues.\n");
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001241 mdev->rs_paused += (long)jiffies
1242 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
Philipp Reisner63106d32010-09-01 15:47:15 +02001243 if (ns.conn == C_SYNC_TARGET)
1244 mod_timer(&mdev->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 }
1246
1247 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
1248 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1249 dev_info(DEV, "Resync suspended\n");
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001250 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251 }
1252
1253 if (os.conn == C_CONNECTED &&
1254 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001255 unsigned long now = jiffies;
1256 int i;
1257
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001258 set_ov_position(mdev, ns.conn);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001259 mdev->rs_start = now;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001260 mdev->rs_last_events = 0;
1261 mdev->rs_last_sect_ev = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 mdev->ov_last_oos_size = 0;
1263 mdev->ov_last_oos_start = 0;
1264
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001265 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001266 mdev->rs_mark_left[i] = mdev->ov_left;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001267 mdev->rs_mark_time[i] = now;
1268 }
1269
Lars Ellenberg2649f082010-11-05 10:05:47 +01001270 drbd_rs_controller_reset(mdev);
1271
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272 if (ns.conn == C_VERIFY_S) {
1273 dev_info(DEV, "Starting Online Verify from sector %llu\n",
1274 (unsigned long long)mdev->ov_position);
1275 mod_timer(&mdev->resync_timer, jiffies);
1276 }
1277 }
1278
1279 if (get_ldev(mdev)) {
1280 u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
1281 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
1282 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
1283
1284 if (test_bit(CRASHED_PRIMARY, &mdev->flags))
1285 mdf |= MDF_CRASHED_PRIMARY;
1286 if (mdev->state.role == R_PRIMARY ||
1287 (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
1288 mdf |= MDF_PRIMARY_IND;
1289 if (mdev->state.conn > C_WF_REPORT_PARAMS)
1290 mdf |= MDF_CONNECTED_IND;
1291 if (mdev->state.disk > D_INCONSISTENT)
1292 mdf |= MDF_CONSISTENT;
1293 if (mdev->state.disk > D_OUTDATED)
1294 mdf |= MDF_WAS_UP_TO_DATE;
1295 if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
1296 mdf |= MDF_PEER_OUT_DATED;
1297 if (mdf != mdev->ldev->md.flags) {
1298 mdev->ldev->md.flags = mdf;
1299 drbd_md_mark_dirty(mdev);
1300 }
1301 if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1302 drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
1303 put_ldev(mdev);
1304 }
1305
1306 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1307 if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1308 os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1309 set_bit(CONSIDER_RESYNC, &mdev->flags);
1310
1311 /* Receiver should clean up itself */
1312 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1313 drbd_thread_stop_nowait(&mdev->receiver);
1314
1315 /* Now the receiver finished cleaning up itself, it should die */
1316 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1317 drbd_thread_stop_nowait(&mdev->receiver);
1318
1319 /* Upon network failure, we need to restart the receiver. */
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001320 if (os.conn > C_WF_CONNECTION &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1322 drbd_thread_restart_nowait(&mdev->receiver);
1323
Philipp Reisner07782862010-08-31 12:00:50 +02001324 /* Resume AL writing if we get a connection */
1325 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1326 drbd_resume_al(mdev);
1327
Lars Ellenbergba280c02012-04-25 11:46:14 +02001328 /* remember last connect and attach times so request_timer_fn() won't
1329 * kill newly established sessions while we are still trying to thaw
1330 * previously frozen IO */
1331 if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
1332 mdev->last_reconnect_jif = jiffies;
1333 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1334 ns.disk > D_NEGOTIATING)
1335 mdev->last_reattach_jif = jiffies;
1336
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1338 if (ascw) {
1339 ascw->os = os;
1340 ascw->ns = ns;
1341 ascw->flags = flags;
1342 ascw->w.cb = w_after_state_ch;
1343 ascw->done = done;
1344 drbd_queue_work(&mdev->data.work, &ascw->w);
1345 } else {
1346 dev_warn(DEV, "Could not kmalloc an ascw\n");
1347 }
1348
1349 return rv;
1350}
1351
1352static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1353{
1354 struct after_state_chg_work *ascw =
1355 container_of(w, struct after_state_chg_work, w);
1356 after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
1357 if (ascw->flags & CS_WAIT_COMPLETE) {
1358 D_ASSERT(ascw->done != NULL);
1359 complete(ascw->done);
1360 }
1361 kfree(ascw);
1362
1363 return 1;
1364}
1365
1366static void abw_start_sync(struct drbd_conf *mdev, int rv)
1367{
1368 if (rv) {
1369 dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
1370 _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
1371 return;
1372 }
1373
1374 switch (mdev->state.conn) {
1375 case C_STARTING_SYNC_T:
1376 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1377 break;
1378 case C_STARTING_SYNC_S:
1379 drbd_start_resync(mdev, C_SYNC_SOURCE);
1380 break;
1381 }
1382}
1383
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001384int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1385 int (*io_fn)(struct drbd_conf *),
1386 char *why, enum bm_flag flags)
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001387{
1388 int rv;
1389
1390 D_ASSERT(current == mdev->worker.task);
1391
1392 /* open coded non-blocking drbd_suspend_io(mdev); */
1393 set_bit(SUSPEND_IO, &mdev->flags);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001394
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001395 drbd_bm_lock(mdev, why, flags);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001396 rv = io_fn(mdev);
1397 drbd_bm_unlock(mdev);
1398
1399 drbd_resume_io(mdev);
1400
1401 return rv;
1402}
1403
Philipp Reisnerb411b362009-09-25 16:07:19 -07001404/**
1405 * after_state_ch() - Perform after state change actions that may sleep
1406 * @mdev: DRBD device.
1407 * @os: old state.
1408 * @ns: new state.
1409 * @flags: Flags
1410 */
1411static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1412 union drbd_state ns, enum chg_state_flags flags)
1413{
1414 enum drbd_fencing_p fp;
Philipp Reisner67098932010-06-24 16:24:25 +02001415 enum drbd_req_event what = nothing;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001416 union drbd_state nsm = (union drbd_state){ .i = -1 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07001417
1418 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1419 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1420 if (mdev->p_uuid)
1421 mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
1422 }
1423
1424 fp = FP_DONT_CARE;
1425 if (get_ldev(mdev)) {
1426 fp = mdev->ldev->dc.fencing;
1427 put_ldev(mdev);
1428 }
1429
1430 /* Inform userspace about the change... */
1431 drbd_bcast_state(mdev, ns);
1432
1433 if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1434 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1435 drbd_khelper(mdev, "pri-on-incon-degr");
1436
1437 /* Here we have the actions that are performed after a
1438 state change. This function might sleep */
1439
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001440 if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
1441 mod_timer(&mdev->request_timer, jiffies + HZ);
1442
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001443 nsm.i = -1;
1444 if (ns.susp_nod) {
Philipp Reisner3f986882010-12-20 14:48:20 +01001445 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1446 what = resend;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001447
Philipp Reisner79f16f52011-07-15 18:44:26 +02001448 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1449 ns.disk > D_NEGOTIATING)
Philipp Reisner3f986882010-12-20 14:48:20 +01001450 what = restart_frozen_disk_io;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001451
Philipp Reisner3f986882010-12-20 14:48:20 +01001452 if (what != nothing)
1453 nsm.susp_nod = 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001454 }
1455
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001456 if (ns.susp_fen) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001457 /* case1: The outdate peer handler is successful: */
1458 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459 tl_clear(mdev);
Philipp Reisner43a51822010-06-11 11:26:34 +02001460 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1461 drbd_uuid_new_current(mdev);
1462 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner43a51822010-06-11 11:26:34 +02001463 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464 spin_lock_irq(&mdev->req_lock);
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001465 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 spin_unlock_irq(&mdev->req_lock);
1467 }
Philipp Reisner43a51822010-06-11 11:26:34 +02001468 /* case2: The connection was established again: */
1469 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1470 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner67098932010-06-24 16:24:25 +02001471 what = resend;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001472 nsm.susp_fen = 0;
Philipp Reisner43a51822010-06-11 11:26:34 +02001473 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474 }
Philipp Reisner67098932010-06-24 16:24:25 +02001475
1476 if (what != nothing) {
1477 spin_lock_irq(&mdev->req_lock);
1478 _tl_restart(mdev, what);
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001479 nsm.i &= mdev->state.i;
1480 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
Philipp Reisner67098932010-06-24 16:24:25 +02001481 spin_unlock_irq(&mdev->req_lock);
1482 }
1483
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001484 /* Became sync source. With protocol >= 96, we still need to send out
1485 * the sync uuid now. Need to do that before any drbd_send_state, or
1486 * the other side may go "paused sync" before receiving the sync uuids,
1487 * which is unexpected. */
1488 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1489 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1490 mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
1491 drbd_gen_and_send_sync_uuid(mdev);
1492 put_ldev(mdev);
1493 }
1494
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495 /* Do not change the order of the if above and the two below... */
1496 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1497 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001498 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499 }
Lars Ellenberg54b956a2011-01-20 10:47:53 +01001500 /* No point in queuing send_bitmap if we don't have a connection
1501 * anymore, so check also the _current_ state, not only the new state
1502 * at the time this work was queued. */
1503 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1504 mdev->state.conn == C_WF_BITMAP_S)
1505 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001506 "send_bitmap (WFBitMapS)",
1507 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508
1509 /* Lost contact to peer's copy of the data */
1510 if ((os.pdsk >= D_INCONSISTENT &&
1511 os.pdsk != D_UNKNOWN &&
1512 os.pdsk != D_OUTDATED)
1513 && (ns.pdsk < D_INCONSISTENT ||
1514 ns.pdsk == D_UNKNOWN ||
1515 ns.pdsk == D_OUTDATED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516 if (get_ldev(mdev)) {
1517 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001518 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001519 if (is_susp(mdev->state)) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001520 set_bit(NEW_CUR_UUID, &mdev->flags);
1521 } else {
1522 drbd_uuid_new_current(mdev);
1523 drbd_send_uuids(mdev);
1524 }
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001525 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526 put_ldev(mdev);
1527 }
1528 }
1529
1530 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
Philipp Reisnerbca482e2011-07-15 12:14:27 +02001531 if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
1532 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001533 drbd_uuid_new_current(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02001534 drbd_send_uuids(mdev);
1535 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536 /* D_DISKLESS Peer becomes secondary */
1537 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001538 /* We may still be Primary ourselves.
1539 * No harm done if the bitmap still changes,
1540 * redirtied pages will follow later. */
1541 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1542 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001543 put_ldev(mdev);
1544 }
1545
Lars Ellenberg06d33e92010-12-18 17:00:59 +01001546 /* Write out all changed bits on demote.
1547 * Though, no need to da that just yet
1548 * if there is a resync going on still */
1549 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1550 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001551 /* No changes to the bitmap expected this time, so assert that,
1552 * even though no harm was done if it did change. */
1553 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1554 "demote", BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 put_ldev(mdev);
1556 }
1557
1558 /* Last part of the attaching process ... */
1559 if (ns.conn >= C_CONNECTED &&
1560 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01001561 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001563 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 }
1565
1566 /* We want to pause/continue resync, tell peer. */
1567 if (ns.conn >= C_CONNECTED &&
1568 ((os.aftr_isp != ns.aftr_isp) ||
1569 (os.user_isp != ns.user_isp)))
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001570 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571
1572 /* In case one of the isp bits got set, suspend other devices. */
1573 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1574 (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1575 suspend_other_sg(mdev);
1576
1577 /* Make sure the peer gets informed about eventual state
1578 changes (ISP bits) while we were in WFReportParams. */
1579 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001580 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581
Philipp Reisner67531712010-10-27 12:21:30 +02001582 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001583 drbd_send_state(mdev, ns);
Philipp Reisner67531712010-10-27 12:21:30 +02001584
Philipp Reisnerb411b362009-09-25 16:07:19 -07001585 /* We are in the progress to start a full sync... */
1586 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1587 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001588 /* no other bitmap changes expected during this phase */
1589 drbd_queue_bitmap_io(mdev,
1590 &drbd_bmio_set_n_write, &abw_start_sync,
1591 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592
1593 /* We are invalidating our self... */
1594 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1595 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001596 /* other bitmap operation expected during this phase */
1597 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1598 "set_n_write from invalidate", BM_LOCKED_MASK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001600 /* first half of local IO error, failure to attach,
1601 * or administrative detach */
1602 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
Philipp Reisner7caacb62011-12-14 18:01:21 +01001603 enum drbd_io_error_p eh = EP_PASS_ON;
1604 int was_io_error = 0;
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001605 /* corresponding get_ldev was in __drbd_set_state, to serialize
Philipp Reisner7caacb62011-12-14 18:01:21 +01001606 * our cleanup here with the transition to D_DISKLESS.
1607 * But is is still not save to dreference ldev here, since
1608 * we might come from an failed Attach before ldev was set. */
1609 if (mdev->ldev) {
1610 eh = mdev->ldev->dc.on_io_error;
1611 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001612
Philipp Reisner7caacb62011-12-14 18:01:21 +01001613 /* Immediately allow completion of all application IO, that waits
1614 for completion from the local disk. */
1615 tl_abort_disk_io(mdev);
Philipp Reisner2b4dd362011-03-14 13:01:50 +01001616
Philipp Reisner7caacb62011-12-14 18:01:21 +01001617 /* current state still has to be D_FAILED,
1618 * there is only one way out: to D_DISKLESS,
1619 * and that may only happen after our put_ldev below. */
1620 if (mdev->state.disk != D_FAILED)
1621 dev_err(DEV,
1622 "ASSERT FAILED: disk is %s during detach\n",
1623 drbd_disk_str(mdev->state.disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624
Philipp Reisner7caacb62011-12-14 18:01:21 +01001625 if (ns.conn >= C_CONNECTED)
1626 drbd_send_state(mdev, ns);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001627
Philipp Reisner7caacb62011-12-14 18:01:21 +01001628 drbd_rs_cancel_all(mdev);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001629
Philipp Reisner7caacb62011-12-14 18:01:21 +01001630 /* In case we want to get something to stable storage still,
1631 * this may be the last chance.
1632 * Following put_ldev may transition to D_DISKLESS. */
1633 drbd_md_sync(mdev);
1634 }
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001635 put_ldev(mdev);
1636
1637 if (was_io_error && eh == EP_CALL_HELPER)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638 drbd_khelper(mdev, "local-io-error");
1639 }
1640
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001641 /* second half of local IO error, failure to attach,
1642 * or administrative detach,
1643 * after local_cnt references have reached zero again */
1644 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1645 /* We must still be diskless,
1646 * re-attach has to be serialized with this! */
1647 if (mdev->state.disk != D_DISKLESS)
1648 dev_err(DEV,
1649 "ASSERT FAILED: disk is %s while going diskless\n",
1650 drbd_disk_str(mdev->state.disk));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001651
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001652 mdev->rs_total = 0;
1653 mdev->rs_failed = 0;
1654 atomic_set(&mdev->rs_pending_cnt, 0);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001655
Philipp Reisner4afc4332011-12-13 10:31:32 +01001656 if (ns.conn >= C_CONNECTED)
1657 drbd_send_state(mdev, ns);
1658
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001659 /* corresponding get_ldev in __drbd_set_state
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001660 * this may finally trigger drbd_ldev_destroy. */
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001661 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 }
1663
Philipp Reisner738a84b2011-03-03 00:21:30 +01001664 /* Notify peer that I had a local IO error, and did not detached.. */
Philipp Reisner4afc4332011-12-13 10:31:32 +01001665 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001666 drbd_send_state(mdev, ns);
Philipp Reisner738a84b2011-03-03 00:21:30 +01001667
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668 /* Disks got bigger while they were detached */
1669 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1670 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1671 if (ns.conn == C_CONNECTED)
1672 resync_after_online_grow(mdev);
1673 }
1674
1675 /* A resync finished or aborted, wake paused devices... */
1676 if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1677 (os.peer_isp && !ns.peer_isp) ||
1678 (os.user_isp && !ns.user_isp))
1679 resume_next_sg(mdev);
1680
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001681 /* sync target done with resync. Explicitly notify peer, even though
1682 * it should (at least for non-empty resyncs) already know itself. */
1683 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001684 drbd_send_state(mdev, ns);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001685
Philipp Reisner197296f2012-03-26 16:47:11 +02001686 /* Wake up role changes, that were delayed because of connection establishing */
1687 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
1688 clear_bit(STATE_SENT, &mdev->flags);
1689 wake_up(&mdev->state_wait);
1690 }
1691
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001692 /* This triggers bitmap writeout of potentially still unwritten pages
1693 * if the resync finished cleanly, or aborted because of peer disk
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001694 * failure, or because of connection loss.
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001695 * For resync aborted because of local disk failure, we cannot do
1696 * any bitmap writeout anymore.
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001697 * No harm done if some bits change during this phase.
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001698 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001699 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
Lars Ellenberg0e8488a2012-04-25 23:06:45 +02001700 drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
1701 "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001702 put_ldev(mdev);
1703 }
Lars Ellenberg02851e92010-12-16 14:47:39 +01001704
Philipp Reisnerf70b35112010-06-24 14:34:40 +02001705 /* free tl_hash if we Got thawed and are C_STANDALONE */
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001706 if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02001707 drbd_free_tl_hash(mdev);
1708
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709 /* Upon network connection, we need to start the receiver */
1710 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1711 drbd_thread_start(&mdev->receiver);
1712
1713 /* Terminate worker thread if we are unconfigured - it will be
1714 restarted as needed... */
1715 if (ns.disk == D_DISKLESS &&
1716 ns.conn == C_STANDALONE &&
1717 ns.role == R_SECONDARY) {
1718 if (os.aftr_isp != ns.aftr_isp)
1719 resume_next_sg(mdev);
1720 /* set in __drbd_set_state, unless CONFIG_PENDING was set */
1721 if (test_bit(DEVICE_DYING, &mdev->flags))
1722 drbd_thread_stop_nowait(&mdev->worker);
1723 }
1724
1725 drbd_md_sync(mdev);
1726}
1727
1728
1729static int drbd_thread_setup(void *arg)
1730{
1731 struct drbd_thread *thi = (struct drbd_thread *) arg;
1732 struct drbd_conf *mdev = thi->mdev;
1733 unsigned long flags;
1734 int retval;
1735
1736restart:
1737 retval = thi->function(thi);
1738
1739 spin_lock_irqsave(&thi->t_lock, flags);
1740
1741 /* if the receiver has been "Exiting", the last thing it did
1742 * was set the conn state to "StandAlone",
1743 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
1744 * and receiver thread will be "started".
1745 * drbd_thread_start needs to set "Restarting" in that case.
1746 * t_state check and assignment needs to be within the same spinlock,
1747 * so either thread_start sees Exiting, and can remap to Restarting,
1748 * or thread_start see None, and can proceed as normal.
1749 */
1750
1751 if (thi->t_state == Restarting) {
1752 dev_info(DEV, "Restarting %s\n", current->comm);
1753 thi->t_state = Running;
1754 spin_unlock_irqrestore(&thi->t_lock, flags);
1755 goto restart;
1756 }
1757
1758 thi->task = NULL;
1759 thi->t_state = None;
1760 smp_mb();
1761 complete(&thi->stop);
1762 spin_unlock_irqrestore(&thi->t_lock, flags);
1763
1764 dev_info(DEV, "Terminating %s\n", current->comm);
1765
1766 /* Release mod reference taken when thread was started */
1767 module_put(THIS_MODULE);
1768 return retval;
1769}
1770
1771static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
1772 int (*func) (struct drbd_thread *))
1773{
1774 spin_lock_init(&thi->t_lock);
1775 thi->task = NULL;
1776 thi->t_state = None;
1777 thi->function = func;
1778 thi->mdev = mdev;
1779}
1780
1781int drbd_thread_start(struct drbd_thread *thi)
1782{
1783 struct drbd_conf *mdev = thi->mdev;
1784 struct task_struct *nt;
1785 unsigned long flags;
1786
1787 const char *me =
1788 thi == &mdev->receiver ? "receiver" :
1789 thi == &mdev->asender ? "asender" :
1790 thi == &mdev->worker ? "worker" : "NONSENSE";
1791
1792 /* is used from state engine doing drbd_thread_stop_nowait,
1793 * while holding the req lock irqsave */
1794 spin_lock_irqsave(&thi->t_lock, flags);
1795
1796 switch (thi->t_state) {
1797 case None:
1798 dev_info(DEV, "Starting %s thread (from %s [%d])\n",
1799 me, current->comm, current->pid);
1800
1801 /* Get ref on module for thread - this is released when thread exits */
1802 if (!try_module_get(THIS_MODULE)) {
1803 dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
1804 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001805 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 }
1807
1808 init_completion(&thi->stop);
1809 D_ASSERT(thi->task == NULL);
1810 thi->reset_cpu_mask = 1;
1811 thi->t_state = Running;
1812 spin_unlock_irqrestore(&thi->t_lock, flags);
1813 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
1814
1815 nt = kthread_create(drbd_thread_setup, (void *) thi,
1816 "drbd%d_%s", mdev_to_minor(mdev), me);
1817
1818 if (IS_ERR(nt)) {
1819 dev_err(DEV, "Couldn't start thread\n");
1820
1821 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001822 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823 }
1824 spin_lock_irqsave(&thi->t_lock, flags);
1825 thi->task = nt;
1826 thi->t_state = Running;
1827 spin_unlock_irqrestore(&thi->t_lock, flags);
1828 wake_up_process(nt);
1829 break;
1830 case Exiting:
1831 thi->t_state = Restarting;
1832 dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
1833 me, current->comm, current->pid);
1834 /* fall through */
1835 case Running:
1836 case Restarting:
1837 default:
1838 spin_unlock_irqrestore(&thi->t_lock, flags);
1839 break;
1840 }
1841
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001842 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843}
1844
1845
1846void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
1847{
1848 unsigned long flags;
1849
1850 enum drbd_thread_state ns = restart ? Restarting : Exiting;
1851
1852 /* may be called from state engine, holding the req lock irqsave */
1853 spin_lock_irqsave(&thi->t_lock, flags);
1854
1855 if (thi->t_state == None) {
1856 spin_unlock_irqrestore(&thi->t_lock, flags);
1857 if (restart)
1858 drbd_thread_start(thi);
1859 return;
1860 }
1861
1862 if (thi->t_state != ns) {
1863 if (thi->task == NULL) {
1864 spin_unlock_irqrestore(&thi->t_lock, flags);
1865 return;
1866 }
1867
1868 thi->t_state = ns;
1869 smp_mb();
1870 init_completion(&thi->stop);
1871 if (thi->task != current)
1872 force_sig(DRBD_SIGKILL, thi->task);
1873
1874 }
1875
1876 spin_unlock_irqrestore(&thi->t_lock, flags);
1877
1878 if (wait)
1879 wait_for_completion(&thi->stop);
1880}
1881
1882#ifdef CONFIG_SMP
1883/**
1884 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
1885 * @mdev: DRBD device.
1886 *
1887 * Forces all threads of a device onto the same CPU. This is beneficial for
1888 * DRBD's performance. May be overwritten by user's configuration.
1889 */
1890void drbd_calc_cpu_mask(struct drbd_conf *mdev)
1891{
1892 int ord, cpu;
1893
1894 /* user override. */
1895 if (cpumask_weight(mdev->cpu_mask))
1896 return;
1897
1898 ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
1899 for_each_online_cpu(cpu) {
1900 if (ord-- == 0) {
1901 cpumask_set_cpu(cpu, mdev->cpu_mask);
1902 return;
1903 }
1904 }
1905 /* should not be reached */
1906 cpumask_setall(mdev->cpu_mask);
1907}
1908
1909/**
1910 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
1911 * @mdev: DRBD device.
1912 *
1913 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
1914 * prematurely.
1915 */
1916void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
1917{
1918 struct task_struct *p = current;
1919 struct drbd_thread *thi =
1920 p == mdev->asender.task ? &mdev->asender :
1921 p == mdev->receiver.task ? &mdev->receiver :
1922 p == mdev->worker.task ? &mdev->worker :
1923 NULL;
1924 ERR_IF(thi == NULL)
1925 return;
1926 if (!thi->reset_cpu_mask)
1927 return;
1928 thi->reset_cpu_mask = 0;
1929 set_cpus_allowed_ptr(p, mdev->cpu_mask);
1930}
1931#endif
1932
1933/* the appropriate socket mutex must be held already */
1934int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001935 enum drbd_packets cmd, struct p_header80 *h,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001936 size_t size, unsigned msg_flags)
1937{
1938 int sent, ok;
1939
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001940 ERR_IF(!h) return false;
1941 ERR_IF(!size) return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001942
1943 h->magic = BE_DRBD_MAGIC;
1944 h->command = cpu_to_be16(cmd);
Philipp Reisner0b70a132010-08-20 13:36:10 +02001945 h->length = cpu_to_be16(size-sizeof(struct p_header80));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001946
Philipp Reisnerb411b362009-09-25 16:07:19 -07001947 sent = drbd_send(mdev, sock, h, size, msg_flags);
1948
1949 ok = (sent == size);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001950 if (!ok && !signal_pending(current))
1951 dev_warn(DEV, "short sent %s size=%d sent=%d\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001952 cmdname(cmd), (int)size, sent);
1953 return ok;
1954}
1955
1956/* don't pass the socket. we may only look at it
1957 * when we hold the appropriate socket mutex.
1958 */
1959int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001960 enum drbd_packets cmd, struct p_header80 *h, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001961{
1962 int ok = 0;
1963 struct socket *sock;
1964
1965 if (use_data_socket) {
1966 mutex_lock(&mdev->data.mutex);
1967 sock = mdev->data.socket;
1968 } else {
1969 mutex_lock(&mdev->meta.mutex);
1970 sock = mdev->meta.socket;
1971 }
1972
1973 /* drbd_disconnect() could have called drbd_free_sock()
1974 * while we were waiting in down()... */
1975 if (likely(sock != NULL))
1976 ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
1977
1978 if (use_data_socket)
1979 mutex_unlock(&mdev->data.mutex);
1980 else
1981 mutex_unlock(&mdev->meta.mutex);
1982 return ok;
1983}
1984
1985int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
1986 size_t size)
1987{
Philipp Reisner0b70a132010-08-20 13:36:10 +02001988 struct p_header80 h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001989 int ok;
1990
1991 h.magic = BE_DRBD_MAGIC;
1992 h.command = cpu_to_be16(cmd);
1993 h.length = cpu_to_be16(size);
1994
1995 if (!drbd_get_data_sock(mdev))
1996 return 0;
1997
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 ok = (sizeof(h) ==
1999 drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
2000 ok = ok && (size ==
2001 drbd_send(mdev, mdev->data.socket, data, size, 0));
2002
2003 drbd_put_data_sock(mdev);
2004
2005 return ok;
2006}
2007
2008int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
2009{
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002010 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011 struct socket *sock;
2012 int size, rv;
2013 const int apv = mdev->agreed_pro_version;
2014
2015 size = apv <= 87 ? sizeof(struct p_rs_param)
2016 : apv == 88 ? sizeof(struct p_rs_param)
2017 + strlen(mdev->sync_conf.verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002018 : apv <= 94 ? sizeof(struct p_rs_param_89)
2019 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020
2021 /* used from admin command context and receiver/worker context.
2022 * to avoid kmalloc, grab the socket right here,
2023 * then use the pre-allocated sbuf there */
2024 mutex_lock(&mdev->data.mutex);
2025 sock = mdev->data.socket;
2026
2027 if (likely(sock != NULL)) {
2028 enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
2029
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002030 p = &mdev->data.sbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031
2032 /* initialize verify_alg and csums_alg */
2033 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2034
2035 p->rate = cpu_to_be32(sc->rate);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002036 p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
2037 p->c_delay_target = cpu_to_be32(sc->c_delay_target);
2038 p->c_fill_target = cpu_to_be32(sc->c_fill_target);
2039 p->c_max_rate = cpu_to_be32(sc->c_max_rate);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040
2041 if (apv >= 88)
2042 strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
2043 if (apv >= 89)
2044 strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
2045
2046 rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
2047 } else
2048 rv = 0; /* not ok */
2049
2050 mutex_unlock(&mdev->data.mutex);
2051
2052 return rv;
2053}
2054
2055int drbd_send_protocol(struct drbd_conf *mdev)
2056{
2057 struct p_protocol *p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002058 int size, cf, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059
2060 size = sizeof(struct p_protocol);
2061
2062 if (mdev->agreed_pro_version >= 87)
2063 size += strlen(mdev->net_conf->integrity_alg) + 1;
2064
2065 /* we must not recurse into our own queue,
2066 * as that is blocked during handshake */
2067 p = kmalloc(size, GFP_NOIO);
2068 if (p == NULL)
2069 return 0;
2070
2071 p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
2072 p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
2073 p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
2074 p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075 p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
2076
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002077 cf = 0;
2078 if (mdev->net_conf->want_lose)
2079 cf |= CF_WANT_LOSE;
2080 if (mdev->net_conf->dry_run) {
2081 if (mdev->agreed_pro_version >= 92)
2082 cf |= CF_DRY_RUN;
2083 else {
2084 dev_err(DEV, "--dry-run is not supported by peer");
Dan Carpenter7ac314c2010-04-22 14:27:23 +02002085 kfree(p);
Philipp Reisner148efa12011-01-15 00:21:15 +01002086 return -1;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002087 }
2088 }
2089 p->conn_flags = cpu_to_be32(cf);
2090
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 if (mdev->agreed_pro_version >= 87)
2092 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
2093
2094 rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002095 (struct p_header80 *)p, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002096 kfree(p);
2097 return rv;
2098}
2099
2100int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
2101{
2102 struct p_uuids p;
2103 int i;
2104
2105 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
2106 return 1;
2107
2108 for (i = UI_CURRENT; i < UI_SIZE; i++)
2109 p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
2110
2111 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
2112 p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
2113 uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
2114 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
2115 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
2116 p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
2117
2118 put_ldev(mdev);
2119
2120 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002121 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122}
2123
2124int drbd_send_uuids(struct drbd_conf *mdev)
2125{
2126 return _drbd_send_uuids(mdev, 0);
2127}
2128
2129int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
2130{
2131 return _drbd_send_uuids(mdev, 8);
2132}
2133
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002134void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
2135{
2136 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2137 u64 *uuid = mdev->ldev->md.uuid;
2138 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
2139 text,
2140 (unsigned long long)uuid[UI_CURRENT],
2141 (unsigned long long)uuid[UI_BITMAP],
2142 (unsigned long long)uuid[UI_HISTORY_START],
2143 (unsigned long long)uuid[UI_HISTORY_END]);
2144 put_ldev(mdev);
2145 } else {
2146 dev_info(DEV, "%s effective data uuid: %016llX\n",
2147 text,
2148 (unsigned long long)mdev->ed_uuid);
2149 }
2150}
2151
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002152int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153{
2154 struct p_rs_uuid p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002155 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002157 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
2158
Philipp Reisner5ba3dac2011-10-05 15:54:18 +02002159 uuid = mdev->ldev->md.uuid[UI_BITMAP];
2160 if (uuid && uuid != UUID_JUST_CREATED)
2161 uuid = uuid + UUID_NEW_BM_OFFSET;
2162 else
2163 get_random_bytes(&uuid, sizeof(u64));
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002164 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002165 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002166 drbd_md_sync(mdev);
2167 p.uuid = cpu_to_be64(uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002168
2169 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002170 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002171}
2172
Philipp Reisnere89b5912010-03-24 17:11:33 +01002173int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174{
2175 struct p_sizes p;
2176 sector_t d_size, u_size;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002177 int q_order_type, max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178 int ok;
2179
2180 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2181 D_ASSERT(mdev->ldev->backing_bdev);
2182 d_size = drbd_get_max_capacity(mdev->ldev);
2183 u_size = mdev->ldev->dc.disk_size;
2184 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002185 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
2186 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187 put_ldev(mdev);
2188 } else {
2189 d_size = 0;
2190 u_size = 0;
2191 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002192 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193 }
2194
Philipp Reisner68093842011-06-30 15:43:06 +02002195 /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
2196 if (mdev->agreed_pro_version <= 94)
2197 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
2198
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 p.d_size = cpu_to_be64(d_size);
2200 p.u_size = cpu_to_be64(u_size);
2201 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
Philipp Reisner99432fc2011-05-20 16:39:13 +02002202 p.max_bio_size = cpu_to_be32(max_bio_size);
Philipp Reisnere89b5912010-03-24 17:11:33 +01002203 p.queue_order_type = cpu_to_be16(q_order_type);
2204 p.dds_flags = cpu_to_be16(flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205
2206 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002207 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 return ok;
2209}
2210
2211/**
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002212 * drbd_send_current_state() - Sends the drbd state to the peer
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 * @mdev: DRBD device.
2214 */
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002215int drbd_send_current_state(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216{
2217 struct socket *sock;
2218 struct p_state p;
2219 int ok = 0;
2220
2221 /* Grab state lock so we wont send state if we're in the middle
2222 * of a cluster wide state change on another thread */
2223 drbd_state_lock(mdev);
2224
2225 mutex_lock(&mdev->data.mutex);
2226
2227 p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
2228 sock = mdev->data.socket;
2229
2230 if (likely(sock != NULL)) {
2231 ok = _drbd_send_cmd(mdev, sock, P_STATE,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002232 (struct p_header80 *)&p, sizeof(p), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002233 }
2234
2235 mutex_unlock(&mdev->data.mutex);
2236
2237 drbd_state_unlock(mdev);
2238 return ok;
2239}
2240
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002241/**
2242 * drbd_send_state() - After a state change, sends the new state to the peer
2243 * @mdev: DRBD device.
2244 * @state: the state to send, not necessarily the current state.
2245 *
2246 * Each state change queues an "after_state_ch" work, which will eventually
2247 * send the resulting new state to the peer. If more state changes happen
2248 * between queuing and processing of the after_state_ch work, we still
2249 * want to send each intermediary state in the order it occurred.
2250 */
2251int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
2252{
2253 struct socket *sock;
2254 struct p_state p;
2255 int ok = 0;
2256
2257 mutex_lock(&mdev->data.mutex);
2258
2259 p.state = cpu_to_be32(state.i);
2260 sock = mdev->data.socket;
2261
2262 if (likely(sock != NULL)) {
2263 ok = _drbd_send_cmd(mdev, sock, P_STATE,
2264 (struct p_header80 *)&p, sizeof(p), 0);
2265 }
2266
2267 mutex_unlock(&mdev->data.mutex);
2268
2269 return ok;
2270}
2271
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272int drbd_send_state_req(struct drbd_conf *mdev,
2273 union drbd_state mask, union drbd_state val)
2274{
2275 struct p_req_state p;
2276
2277 p.mask = cpu_to_be32(mask.i);
2278 p.val = cpu_to_be32(val.i);
2279
2280 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002281 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282}
2283
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01002284int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285{
2286 struct p_req_state_reply p;
2287
2288 p.retcode = cpu_to_be32(retcode);
2289
2290 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002291 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002292}
2293
2294int fill_bitmap_rle_bits(struct drbd_conf *mdev,
2295 struct p_compressed_bm *p,
2296 struct bm_xfer_ctx *c)
2297{
2298 struct bitstream bs;
2299 unsigned long plain_bits;
2300 unsigned long tmp;
2301 unsigned long rl;
2302 unsigned len;
2303 unsigned toggle;
2304 int bits;
2305
2306 /* may we use this feature? */
2307 if ((mdev->sync_conf.use_rle == 0) ||
2308 (mdev->agreed_pro_version < 90))
2309 return 0;
2310
2311 if (c->bit_offset >= c->bm_bits)
2312 return 0; /* nothing to do. */
2313
2314 /* use at most thus many bytes */
2315 bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
2316 memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
2317 /* plain bits covered in this code string */
2318 plain_bits = 0;
2319
2320 /* p->encoding & 0x80 stores whether the first run length is set.
2321 * bit offset is implicit.
2322 * start with toggle == 2 to be able to tell the first iteration */
2323 toggle = 2;
2324
2325 /* see how much plain bits we can stuff into one packet
2326 * using RLE and VLI. */
2327 do {
2328 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
2329 : _drbd_bm_find_next(mdev, c->bit_offset);
2330 if (tmp == -1UL)
2331 tmp = c->bm_bits;
2332 rl = tmp - c->bit_offset;
2333
2334 if (toggle == 2) { /* first iteration */
2335 if (rl == 0) {
2336 /* the first checked bit was set,
2337 * store start value, */
2338 DCBP_set_start(p, 1);
2339 /* but skip encoding of zero run length */
2340 toggle = !toggle;
2341 continue;
2342 }
2343 DCBP_set_start(p, 0);
2344 }
2345
2346 /* paranoia: catch zero runlength.
2347 * can only happen if bitmap is modified while we scan it. */
2348 if (rl == 0) {
2349 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
2350 "t:%u bo:%lu\n", toggle, c->bit_offset);
2351 return -1;
2352 }
2353
2354 bits = vli_encode_bits(&bs, rl);
2355 if (bits == -ENOBUFS) /* buffer full */
2356 break;
2357 if (bits <= 0) {
2358 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
2359 return 0;
2360 }
2361
2362 toggle = !toggle;
2363 plain_bits += rl;
2364 c->bit_offset = tmp;
2365 } while (c->bit_offset < c->bm_bits);
2366
2367 len = bs.cur.b - p->code + !!bs.cur.bit;
2368
2369 if (plain_bits < (len << 3)) {
2370 /* incompressible with this method.
2371 * we need to rewind both word and bit position. */
2372 c->bit_offset -= plain_bits;
2373 bm_xfer_ctx_bit_to_word_offset(c);
2374 c->bit_offset = c->word_offset * BITS_PER_LONG;
2375 return 0;
2376 }
2377
2378 /* RLE + VLI was able to compress it just fine.
2379 * update c->word_offset. */
2380 bm_xfer_ctx_bit_to_word_offset(c);
2381
2382 /* store pad_bits */
2383 DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
2384
2385 return len;
2386}
2387
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002388/**
2389 * send_bitmap_rle_or_plain
2390 *
2391 * Return 0 when done, 1 when another iteration is needed, and a negative error
2392 * code upon failure.
2393 */
2394static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07002395send_bitmap_rle_or_plain(struct drbd_conf *mdev,
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002396 struct p_header80 *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397{
2398 struct p_compressed_bm *p = (void*)h;
2399 unsigned long num_words;
2400 int len;
2401 int ok;
2402
2403 len = fill_bitmap_rle_bits(mdev, p, c);
2404
2405 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002406 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407
2408 if (len) {
2409 DCBP_set_code(p, RLE_VLI_Bits);
2410 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
2411 sizeof(*p) + len, 0);
2412
2413 c->packets[0]++;
2414 c->bytes[0] += sizeof(*p) + len;
2415
2416 if (c->bit_offset >= c->bm_bits)
2417 len = 0; /* DONE */
2418 } else {
2419 /* was not compressible.
2420 * send a buffer full of plain text bits instead. */
2421 num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
2422 len = num_words * sizeof(long);
2423 if (len)
2424 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
2425 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002426 h, sizeof(struct p_header80) + len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002427 c->word_offset += num_words;
2428 c->bit_offset = c->word_offset * BITS_PER_LONG;
2429
2430 c->packets[1]++;
Philipp Reisner0b70a132010-08-20 13:36:10 +02002431 c->bytes[1] += sizeof(struct p_header80) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002432
2433 if (c->bit_offset > c->bm_bits)
2434 c->bit_offset = c->bm_bits;
2435 }
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002436 if (ok) {
2437 if (len == 0) {
2438 INFO_bm_xfer_stats(mdev, "send", c);
2439 return 0;
2440 } else
2441 return 1;
2442 }
2443 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444}
2445
2446/* See the comment at receive_bitmap() */
2447int _drbd_send_bitmap(struct drbd_conf *mdev)
2448{
2449 struct bm_xfer_ctx c;
Philipp Reisner0b70a132010-08-20 13:36:10 +02002450 struct p_header80 *p;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002451 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002452
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002453 ERR_IF(!mdev->bitmap) return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454
2455 /* maybe we should use some per thread scratch page,
2456 * and allocate that during initial device creation? */
Philipp Reisner0b70a132010-08-20 13:36:10 +02002457 p = (struct p_header80 *) __get_free_page(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 if (!p) {
2459 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002460 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 }
2462
2463 if (get_ldev(mdev)) {
2464 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
2465 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
2466 drbd_bm_set_all(mdev);
2467 if (drbd_bm_write(mdev)) {
2468 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
2469 * but otherwise process as per normal - need to tell other
2470 * side that a full resync is required! */
2471 dev_err(DEV, "Failed to write bitmap to disk!\n");
2472 } else {
2473 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2474 drbd_md_sync(mdev);
2475 }
2476 }
2477 put_ldev(mdev);
2478 }
2479
2480 c = (struct bm_xfer_ctx) {
2481 .bm_bits = drbd_bm_bits(mdev),
2482 .bm_words = drbd_bm_words(mdev),
2483 };
2484
2485 do {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002486 err = send_bitmap_rle_or_plain(mdev, p, &c);
2487 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002488
2489 free_page((unsigned long) p);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002490 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002491}
2492
2493int drbd_send_bitmap(struct drbd_conf *mdev)
2494{
2495 int err;
2496
2497 if (!drbd_get_data_sock(mdev))
2498 return -1;
2499 err = !_drbd_send_bitmap(mdev);
2500 drbd_put_data_sock(mdev);
2501 return err;
2502}
2503
2504int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
2505{
2506 int ok;
2507 struct p_barrier_ack p;
2508
2509 p.barrier = barrier_nr;
2510 p.set_size = cpu_to_be32(set_size);
2511
2512 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002513 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002515 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002516 return ok;
2517}
2518
2519/**
2520 * _drbd_send_ack() - Sends an ack packet
2521 * @mdev: DRBD device.
2522 * @cmd: Packet command code.
2523 * @sector: sector, needs to be in big endian byte order
2524 * @blksize: size in byte, needs to be in big endian byte order
2525 * @block_id: Id, big endian byte order
2526 */
2527static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
2528 u64 sector,
2529 u32 blksize,
2530 u64 block_id)
2531{
2532 int ok;
2533 struct p_block_ack p;
2534
2535 p.sector = sector;
2536 p.block_id = block_id;
2537 p.blksize = blksize;
2538 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
2539
2540 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002541 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002543 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002544 return ok;
2545}
2546
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002547/* dp->sector and dp->block_id already/still in network byte order,
2548 * data_size is payload size according to dp->head,
2549 * and may need to be corrected for digest size. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002551 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002552{
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002553 data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
2554 crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002555 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
2556 dp->block_id);
2557}
2558
2559int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
2560 struct p_block_req *rp)
2561{
2562 return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
2563}
2564
2565/**
2566 * drbd_send_ack() - Sends an ack packet
2567 * @mdev: DRBD device.
2568 * @cmd: Packet command code.
2569 * @e: Epoch entry.
2570 */
2571int drbd_send_ack(struct drbd_conf *mdev,
2572 enum drbd_packets cmd, struct drbd_epoch_entry *e)
2573{
2574 return _drbd_send_ack(mdev, cmd,
2575 cpu_to_be64(e->sector),
2576 cpu_to_be32(e->size),
2577 e->block_id);
2578}
2579
2580/* This function misuses the block_id field to signal if the blocks
2581 * are is sync or not. */
2582int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
2583 sector_t sector, int blksize, u64 block_id)
2584{
2585 return _drbd_send_ack(mdev, cmd,
2586 cpu_to_be64(sector),
2587 cpu_to_be32(blksize),
2588 cpu_to_be64(block_id));
2589}
2590
2591int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
2592 sector_t sector, int size, u64 block_id)
2593{
2594 int ok;
2595 struct p_block_req p;
2596
2597 p.sector = cpu_to_be64(sector);
2598 p.block_id = block_id;
2599 p.blksize = cpu_to_be32(size);
2600
2601 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002602 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002603 return ok;
2604}
2605
2606int drbd_send_drequest_csum(struct drbd_conf *mdev,
2607 sector_t sector, int size,
2608 void *digest, int digest_size,
2609 enum drbd_packets cmd)
2610{
2611 int ok;
2612 struct p_block_req p;
2613
2614 p.sector = cpu_to_be64(sector);
2615 p.block_id = BE_DRBD_MAGIC + 0xbeef;
2616 p.blksize = cpu_to_be32(size);
2617
2618 p.head.magic = BE_DRBD_MAGIC;
2619 p.head.command = cpu_to_be16(cmd);
Philipp Reisner0b70a132010-08-20 13:36:10 +02002620 p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621
2622 mutex_lock(&mdev->data.mutex);
2623
2624 ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
2625 ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
2626
2627 mutex_unlock(&mdev->data.mutex);
2628
2629 return ok;
2630}
2631
2632int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2633{
2634 int ok;
2635 struct p_block_req p;
2636
2637 p.sector = cpu_to_be64(sector);
2638 p.block_id = BE_DRBD_MAGIC + 0xbabe;
2639 p.blksize = cpu_to_be32(size);
2640
2641 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002642 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643 return ok;
2644}
2645
2646/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002647 * returns false if we should retry,
2648 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07002649 */
2650static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
2651{
2652 int drop_it;
2653 /* long elapsed = (long)(jiffies - mdev->last_received); */
2654
2655 drop_it = mdev->meta.socket == sock
2656 || !mdev->asender.task
2657 || get_t_state(&mdev->asender) != Running
2658 || mdev->state.conn < C_CONNECTED;
2659
2660 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002661 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662
2663 drop_it = !--mdev->ko_count;
2664 if (!drop_it) {
2665 dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
2666 current->comm, current->pid, mdev->ko_count);
2667 request_ping(mdev);
2668 }
2669
2670 return drop_it; /* && (mdev->state == R_PRIMARY) */;
2671}
2672
2673/* The idea of sendpage seems to be to put some kind of reference
2674 * to the page into the skb, and to hand it over to the NIC. In
2675 * this process get_page() gets called.
2676 *
2677 * As soon as the page was really sent over the network put_page()
2678 * gets called by some part of the network layer. [ NIC driver? ]
2679 *
2680 * [ get_page() / put_page() increment/decrement the count. If count
2681 * reaches 0 the page will be freed. ]
2682 *
2683 * This works nicely with pages from FSs.
2684 * But this means that in protocol A we might signal IO completion too early!
2685 *
2686 * In order not to corrupt data during a resync we must make sure
2687 * that we do not reuse our own buffer pages (EEs) to early, therefore
2688 * we have the net_ee list.
2689 *
2690 * XFS seems to have problems, still, it submits pages with page_count == 0!
2691 * As a workaround, we disable sendpage on pages
2692 * with page_count == 0 or PageSlab.
2693 */
2694static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002695 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696{
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002697 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002698 kunmap(page);
2699 if (sent == size)
2700 mdev->send_cnt += size>>9;
2701 return sent == size;
2702}
2703
2704static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002705 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002706{
2707 mm_segment_t oldfs = get_fs();
2708 int sent, ok;
2709 int len = size;
2710
2711 /* e.g. XFS meta- & log-data is in slab pages, which have a
2712 * page_count of 0 and/or have PageSlab() set.
2713 * we cannot use send_page for those, as that does get_page();
2714 * put_page(); and would cause either a VM_BUG directly, or
2715 * __page_cache_release a page that would actually still be referenced
2716 * by someone, leading to some obscure delayed Oops somewhere else. */
2717 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002718 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002719
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002720 msg_flags |= MSG_NOSIGNAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002721 drbd_update_congested(mdev);
2722 set_fs(KERNEL_DS);
2723 do {
2724 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
2725 offset, len,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002726 msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002727 if (sent == -EAGAIN) {
2728 if (we_should_drop_the_connection(mdev,
2729 mdev->data.socket))
2730 break;
2731 else
2732 continue;
2733 }
2734 if (sent <= 0) {
2735 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
2736 __func__, (int)size, len, sent);
2737 break;
2738 }
2739 len -= sent;
2740 offset += sent;
2741 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
2742 set_fs(oldfs);
2743 clear_bit(NET_CONGESTED, &mdev->flags);
2744
2745 ok = (len == 0);
2746 if (likely(ok))
2747 mdev->send_cnt += size>>9;
2748 return ok;
2749}
2750
2751static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2752{
2753 struct bio_vec *bvec;
2754 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002755 /* hint all but last page with MSG_MORE */
Lars Ellenberg001a8862012-03-08 16:43:45 +01002756 bio_for_each_segment(bvec, bio, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002757 if (!_drbd_no_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002758 bvec->bv_offset, bvec->bv_len,
2759 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760 return 0;
2761 }
2762 return 1;
2763}
2764
2765static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2766{
2767 struct bio_vec *bvec;
2768 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002769 /* hint all but last page with MSG_MORE */
Lars Ellenberg001a8862012-03-08 16:43:45 +01002770 bio_for_each_segment(bvec, bio, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771 if (!_drbd_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002772 bvec->bv_offset, bvec->bv_len,
2773 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002774 return 0;
2775 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 return 1;
2777}
2778
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002779static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2780{
2781 struct page *page = e->pages;
2782 unsigned len = e->size;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002783 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002784 page_chain_for_each(page) {
2785 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002786 if (!_drbd_send_page(mdev, page, 0, l,
2787 page_chain_next(page) ? MSG_MORE : 0))
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002788 return 0;
2789 len -= l;
2790 }
2791 return 1;
2792}
2793
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002794static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
2795{
2796 if (mdev->agreed_pro_version >= 95)
2797 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002798 (bi_rw & REQ_FUA ? DP_FUA : 0) |
2799 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
2800 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
2801 else
Jens Axboe721a9602011-03-09 11:56:30 +01002802 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002803}
2804
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805/* Used to send write requests
2806 * R_PRIMARY -> Peer (P_DATA)
2807 */
2808int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2809{
2810 int ok = 1;
2811 struct p_data p;
2812 unsigned int dp_flags = 0;
2813 void *dgb;
2814 int dgs;
2815
2816 if (!drbd_get_data_sock(mdev))
2817 return 0;
2818
2819 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2820 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2821
Philipp Reisnerd5373382010-08-23 15:18:33 +02002822 if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
Philipp Reisner0b70a132010-08-20 13:36:10 +02002823 p.head.h80.magic = BE_DRBD_MAGIC;
2824 p.head.h80.command = cpu_to_be16(P_DATA);
2825 p.head.h80.length =
2826 cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2827 } else {
2828 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2829 p.head.h95.command = cpu_to_be16(P_DATA);
2830 p.head.h95.length =
2831 cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2832 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002833
2834 p.sector = cpu_to_be64(req->sector);
2835 p.block_id = (unsigned long)req;
Lars Ellenberg671a74e2012-03-08 11:45:57 +01002836 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002837
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002838 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
2839
Philipp Reisnerb411b362009-09-25 16:07:19 -07002840 if (mdev->state.conn >= C_SYNC_SOURCE &&
2841 mdev->state.conn <= C_PAUSED_SYNC_T)
2842 dp_flags |= DP_MAY_SET_IN_SYNC;
2843
2844 p.dp_flags = cpu_to_be32(dp_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845 set_bit(UNPLUG_REMOTE, &mdev->flags);
2846 ok = (sizeof(p) ==
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002847 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848 if (ok && dgs) {
2849 dgb = mdev->int_dig_out;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002850 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
Andreas Gruenbachercab2f742010-12-09 16:08:46 +01002851 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002852 }
2853 if (ok) {
Lars Ellenberg470be442010-11-10 10:36:52 +01002854 /* For protocol A, we have to memcpy the payload into
2855 * socket buffers, as we may complete right away
2856 * as soon as we handed it over to tcp, at which point the data
2857 * pages may become invalid.
2858 *
2859 * For data-integrity enabled, we copy it as well, so we can be
2860 * sure that even if the bio pages may still be modified, it
2861 * won't change the data on the wire, thus if the digest checks
2862 * out ok after sending on this side, but does not fit on the
2863 * receiving side, we sure have detected corruption elsewhere.
2864 */
2865 if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002866 ok = _drbd_send_bio(mdev, req->master_bio);
2867 else
2868 ok = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01002869
2870 /* double check digest, sometimes buffers have been modified in flight. */
2871 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02002872 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01002873 * currently supported in kernel crypto. */
2874 unsigned char digest[64];
2875 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
2876 if (memcmp(mdev->int_dig_out, digest, dgs)) {
2877 dev_warn(DEV,
2878 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
2879 (unsigned long long)req->sector, req->size);
2880 }
2881 } /* else if (dgs > 64) {
2882 ... Be noisy about digest too large ...
2883 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 }
2885
2886 drbd_put_data_sock(mdev);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02002887
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 return ok;
2889}
2890
2891/* answer packet, used to send data back for read requests:
2892 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
2893 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
2894 */
2895int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2896 struct drbd_epoch_entry *e)
2897{
2898 int ok;
2899 struct p_data p;
2900 void *dgb;
2901 int dgs;
2902
2903 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2904 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2905
Philipp Reisnerd5373382010-08-23 15:18:33 +02002906 if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
Philipp Reisner0b70a132010-08-20 13:36:10 +02002907 p.head.h80.magic = BE_DRBD_MAGIC;
2908 p.head.h80.command = cpu_to_be16(cmd);
2909 p.head.h80.length =
2910 cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2911 } else {
2912 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2913 p.head.h95.command = cpu_to_be16(cmd);
2914 p.head.h95.length =
2915 cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2916 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917
2918 p.sector = cpu_to_be64(e->sector);
2919 p.block_id = e->block_id;
2920 /* p.seq_num = 0; No sequence numbers here.. */
2921
2922 /* Only called by our kernel thread.
2923 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
2924 * in response to admin command or module unload.
2925 */
2926 if (!drbd_get_data_sock(mdev))
2927 return 0;
2928
Philipp Reisner0b70a132010-08-20 13:36:10 +02002929 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002930 if (ok && dgs) {
2931 dgb = mdev->int_dig_out;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002932 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
Andreas Gruenbachercab2f742010-12-09 16:08:46 +01002933 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934 }
2935 if (ok)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002936 ok = _drbd_send_zc_ee(mdev, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002937
2938 drbd_put_data_sock(mdev);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02002939
Philipp Reisnerb411b362009-09-25 16:07:19 -07002940 return ok;
2941}
2942
Philipp Reisner73a01a12010-10-27 14:33:00 +02002943int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
2944{
2945 struct p_block_desc p;
2946
2947 p.sector = cpu_to_be64(req->sector);
2948 p.blksize = cpu_to_be32(req->size);
2949
2950 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
2951}
2952
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953/*
2954 drbd_send distinguishes two cases:
2955
2956 Packets sent via the data socket "sock"
2957 and packets sent via the meta data socket "msock"
2958
2959 sock msock
2960 -----------------+-------------------------+------------------------------
2961 timeout conf.timeout / 2 conf.timeout / 2
2962 timeout action send a ping via msock Abort communication
2963 and close all sockets
2964*/
2965
2966/*
2967 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
2968 */
2969int drbd_send(struct drbd_conf *mdev, struct socket *sock,
2970 void *buf, size_t size, unsigned msg_flags)
2971{
2972 struct kvec iov;
2973 struct msghdr msg;
2974 int rv, sent = 0;
2975
2976 if (!sock)
2977 return -1000;
2978
2979 /* THINK if (signal_pending) return ... ? */
2980
2981 iov.iov_base = buf;
2982 iov.iov_len = size;
2983
2984 msg.msg_name = NULL;
2985 msg.msg_namelen = 0;
2986 msg.msg_control = NULL;
2987 msg.msg_controllen = 0;
2988 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
2989
2990 if (sock == mdev->data.socket) {
2991 mdev->ko_count = mdev->net_conf->ko_count;
2992 drbd_update_congested(mdev);
2993 }
2994 do {
2995 /* STRANGE
2996 * tcp_sendmsg does _not_ use its size parameter at all ?
2997 *
2998 * -EAGAIN on timeout, -EINTR on signal.
2999 */
3000/* THINK
3001 * do we need to block DRBD_SIG if sock == &meta.socket ??
3002 * otherwise wake_asender() might interrupt some send_*Ack !
3003 */
3004 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
3005 if (rv == -EAGAIN) {
3006 if (we_should_drop_the_connection(mdev, sock))
3007 break;
3008 else
3009 continue;
3010 }
3011 D_ASSERT(rv != 0);
3012 if (rv == -EINTR) {
3013 flush_signals(current);
3014 rv = 0;
3015 }
3016 if (rv < 0)
3017 break;
3018 sent += rv;
3019 iov.iov_base += rv;
3020 iov.iov_len -= rv;
3021 } while (sent < size);
3022
3023 if (sock == mdev->data.socket)
3024 clear_bit(NET_CONGESTED, &mdev->flags);
3025
3026 if (rv <= 0) {
3027 if (rv != -EAGAIN) {
3028 dev_err(DEV, "%s_sendmsg returned %d\n",
3029 sock == mdev->meta.socket ? "msock" : "sock",
3030 rv);
3031 drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
3032 } else
3033 drbd_force_state(mdev, NS(conn, C_TIMEOUT));
3034 }
3035
3036 return sent;
3037}
3038
3039static int drbd_open(struct block_device *bdev, fmode_t mode)
3040{
3041 struct drbd_conf *mdev = bdev->bd_disk->private_data;
3042 unsigned long flags;
3043 int rv = 0;
3044
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003045 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046 spin_lock_irqsave(&mdev->req_lock, flags);
3047 /* to have a stable mdev->state.role
3048 * and no race with updating open_cnt */
3049
3050 if (mdev->state.role != R_PRIMARY) {
3051 if (mode & FMODE_WRITE)
3052 rv = -EROFS;
3053 else if (!allow_oos)
3054 rv = -EMEDIUMTYPE;
3055 }
3056
3057 if (!rv)
3058 mdev->open_cnt++;
3059 spin_unlock_irqrestore(&mdev->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003060 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061
3062 return rv;
3063}
3064
3065static int drbd_release(struct gendisk *gd, fmode_t mode)
3066{
3067 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003068 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003070 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 return 0;
3072}
3073
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074static void drbd_set_defaults(struct drbd_conf *mdev)
3075{
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003076 /* This way we get a compile error when sync_conf grows,
3077 and we forgot to initialize it here */
3078 mdev->sync_conf = (struct syncer_conf) {
3079 /* .rate = */ DRBD_RATE_DEF,
3080 /* .after = */ DRBD_AFTER_DEF,
3081 /* .al_extents = */ DRBD_AL_EXTENTS_DEF,
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003082 /* .verify_alg = */ {}, 0,
3083 /* .cpu_mask = */ {}, 0,
3084 /* .csums_alg = */ {}, 0,
Philipp Reisnere7564142010-06-29 17:35:34 +02003085 /* .use_rle = */ 0,
Philipp Reisner9a31d712010-07-05 13:42:03 +02003086 /* .on_no_data = */ DRBD_ON_NO_DATA_DEF,
3087 /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF,
3088 /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF,
3089 /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF,
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003090 /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF,
3091 /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003092 };
3093
3094 /* Have to use that way, because the layout differs between
3095 big endian and little endian */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 mdev->state = (union drbd_state) {
3097 { .role = R_SECONDARY,
3098 .peer = R_UNKNOWN,
3099 .conn = C_STANDALONE,
3100 .disk = D_DISKLESS,
3101 .pdsk = D_UNKNOWN,
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003102 .susp = 0,
3103 .susp_nod = 0,
3104 .susp_fen = 0
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105 } };
3106}
3107
3108void drbd_init_set_defaults(struct drbd_conf *mdev)
3109{
3110 /* the memset(,0,) did most of this.
3111 * note: only assignments, no allocation in here */
3112
3113 drbd_set_defaults(mdev);
3114
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115 atomic_set(&mdev->ap_bio_cnt, 0);
3116 atomic_set(&mdev->ap_pending_cnt, 0);
3117 atomic_set(&mdev->rs_pending_cnt, 0);
3118 atomic_set(&mdev->unacked_cnt, 0);
3119 atomic_set(&mdev->local_cnt, 0);
3120 atomic_set(&mdev->net_cnt, 0);
3121 atomic_set(&mdev->packet_seq, 0);
3122 atomic_set(&mdev->pp_in_use, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003123 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02003124 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003125 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02003126 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnere1711732011-06-27 11:51:46 +02003127 atomic_set(&mdev->md_io_in_use, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129 mutex_init(&mdev->data.mutex);
3130 mutex_init(&mdev->meta.mutex);
3131 sema_init(&mdev->data.work.s, 0);
3132 sema_init(&mdev->meta.work.s, 0);
3133 mutex_init(&mdev->state_mutex);
3134
3135 spin_lock_init(&mdev->data.work.q_lock);
3136 spin_lock_init(&mdev->meta.work.q_lock);
3137
3138 spin_lock_init(&mdev->al_lock);
3139 spin_lock_init(&mdev->req_lock);
3140 spin_lock_init(&mdev->peer_seq_lock);
3141 spin_lock_init(&mdev->epoch_lock);
3142
3143 INIT_LIST_HEAD(&mdev->active_ee);
3144 INIT_LIST_HEAD(&mdev->sync_ee);
3145 INIT_LIST_HEAD(&mdev->done_ee);
3146 INIT_LIST_HEAD(&mdev->read_ee);
3147 INIT_LIST_HEAD(&mdev->net_ee);
3148 INIT_LIST_HEAD(&mdev->resync_reads);
3149 INIT_LIST_HEAD(&mdev->data.work.q);
3150 INIT_LIST_HEAD(&mdev->meta.work.q);
3151 INIT_LIST_HEAD(&mdev->resync_work.list);
3152 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003153 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003154 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003155 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003156 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02003157
Philipp Reisner794abb72010-12-27 11:51:23 +01003158 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003160 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 mdev->md_sync_work.cb = w_md_sync;
3162 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01003163 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 init_timer(&mdev->resync_timer);
3165 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01003166 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003167 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168 mdev->resync_timer.function = resync_timer_fn;
3169 mdev->resync_timer.data = (unsigned long) mdev;
3170 mdev->md_sync_timer.function = md_sync_timer_fn;
3171 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01003172 mdev->start_resync_timer.function = start_resync_timer_fn;
3173 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003174 mdev->request_timer.function = request_timer_fn;
3175 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176
3177 init_waitqueue_head(&mdev->misc_wait);
3178 init_waitqueue_head(&mdev->state_wait);
Philipp Reisner84dfb9f2010-06-23 11:20:05 +02003179 init_waitqueue_head(&mdev->net_cnt_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 init_waitqueue_head(&mdev->ee_wait);
3181 init_waitqueue_head(&mdev->al_wait);
3182 init_waitqueue_head(&mdev->seq_wait);
3183
3184 drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
3185 drbd_thread_init(mdev, &mdev->worker, drbd_worker);
3186 drbd_thread_init(mdev, &mdev->asender, drbd_asender);
3187
3188 mdev->agreed_pro_version = PRO_VERSION_MAX;
Philipp Reisner2451fc32010-08-24 13:43:11 +02003189 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003190 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02003191 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3192 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193}
3194
3195void drbd_mdev_cleanup(struct drbd_conf *mdev)
3196{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003197 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198 if (mdev->receiver.t_state != None)
3199 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
3200 mdev->receiver.t_state);
3201
3202 /* no need to lock it, I'm the only thread alive */
3203 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
3204 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
3205 mdev->al_writ_cnt =
3206 mdev->bm_writ_cnt =
3207 mdev->read_cnt =
3208 mdev->recv_cnt =
3209 mdev->send_cnt =
3210 mdev->writ_cnt =
3211 mdev->p_size =
3212 mdev->rs_start =
3213 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003214 mdev->rs_failed = 0;
3215 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003216 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003217 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
3218 mdev->rs_mark_left[i] = 0;
3219 mdev->rs_mark_time[i] = 0;
3220 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 D_ASSERT(mdev->net_conf == NULL);
3222
3223 drbd_set_my_capacity(mdev, 0);
3224 if (mdev->bitmap) {
3225 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01003226 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003227 drbd_bm_cleanup(mdev);
3228 }
3229
3230 drbd_free_resources(mdev);
Philipp Reisner07782862010-08-31 12:00:50 +02003231 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232
3233 /*
3234 * currently we drbd_init_ee only on module load, so
3235 * we may do drbd_release_ee only on module unload!
3236 */
3237 D_ASSERT(list_empty(&mdev->active_ee));
3238 D_ASSERT(list_empty(&mdev->sync_ee));
3239 D_ASSERT(list_empty(&mdev->done_ee));
3240 D_ASSERT(list_empty(&mdev->read_ee));
3241 D_ASSERT(list_empty(&mdev->net_ee));
3242 D_ASSERT(list_empty(&mdev->resync_reads));
3243 D_ASSERT(list_empty(&mdev->data.work.q));
3244 D_ASSERT(list_empty(&mdev->meta.work.q));
3245 D_ASSERT(list_empty(&mdev->resync_work.list));
3246 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003247 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01003248
3249 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250}
3251
3252
3253static void drbd_destroy_mempools(void)
3254{
3255 struct page *page;
3256
3257 while (drbd_pp_pool) {
3258 page = drbd_pp_pool;
3259 drbd_pp_pool = (struct page *)page_private(page);
3260 __free_page(page);
3261 drbd_pp_vacant--;
3262 }
3263
3264 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
3265
Lars Ellenberg42818082011-02-23 12:39:46 +01003266 if (drbd_md_io_page_pool)
3267 mempool_destroy(drbd_md_io_page_pool);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003268 if (drbd_ee_mempool)
3269 mempool_destroy(drbd_ee_mempool);
3270 if (drbd_request_mempool)
3271 mempool_destroy(drbd_request_mempool);
3272 if (drbd_ee_cache)
3273 kmem_cache_destroy(drbd_ee_cache);
3274 if (drbd_request_cache)
3275 kmem_cache_destroy(drbd_request_cache);
3276 if (drbd_bm_ext_cache)
3277 kmem_cache_destroy(drbd_bm_ext_cache);
3278 if (drbd_al_ext_cache)
3279 kmem_cache_destroy(drbd_al_ext_cache);
3280
Lars Ellenberg42818082011-02-23 12:39:46 +01003281 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003282 drbd_ee_mempool = NULL;
3283 drbd_request_mempool = NULL;
3284 drbd_ee_cache = NULL;
3285 drbd_request_cache = NULL;
3286 drbd_bm_ext_cache = NULL;
3287 drbd_al_ext_cache = NULL;
3288
3289 return;
3290}
3291
3292static int drbd_create_mempools(void)
3293{
3294 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01003295 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 int i;
3297
3298 /* prepare our caches and mempools */
3299 drbd_request_mempool = NULL;
3300 drbd_ee_cache = NULL;
3301 drbd_request_cache = NULL;
3302 drbd_bm_ext_cache = NULL;
3303 drbd_al_ext_cache = NULL;
3304 drbd_pp_pool = NULL;
Lars Ellenberg42818082011-02-23 12:39:46 +01003305 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003306
3307 /* caches */
3308 drbd_request_cache = kmem_cache_create(
3309 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
3310 if (drbd_request_cache == NULL)
3311 goto Enomem;
3312
3313 drbd_ee_cache = kmem_cache_create(
3314 "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
3315 if (drbd_ee_cache == NULL)
3316 goto Enomem;
3317
3318 drbd_bm_ext_cache = kmem_cache_create(
3319 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
3320 if (drbd_bm_ext_cache == NULL)
3321 goto Enomem;
3322
3323 drbd_al_ext_cache = kmem_cache_create(
3324 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
3325 if (drbd_al_ext_cache == NULL)
3326 goto Enomem;
3327
3328 /* mempools */
Lars Ellenberg42818082011-02-23 12:39:46 +01003329 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
3330 if (drbd_md_io_page_pool == NULL)
3331 goto Enomem;
3332
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 drbd_request_mempool = mempool_create(number,
3334 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
3335 if (drbd_request_mempool == NULL)
3336 goto Enomem;
3337
3338 drbd_ee_mempool = mempool_create(number,
3339 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06003340 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341 goto Enomem;
3342
3343 /* drbd's page pool */
3344 spin_lock_init(&drbd_pp_lock);
3345
3346 for (i = 0; i < number; i++) {
3347 page = alloc_page(GFP_HIGHUSER);
3348 if (!page)
3349 goto Enomem;
3350 set_page_private(page, (unsigned long)drbd_pp_pool);
3351 drbd_pp_pool = page;
3352 }
3353 drbd_pp_vacant = number;
3354
3355 return 0;
3356
3357Enomem:
3358 drbd_destroy_mempools(); /* in case we allocated some */
3359 return -ENOMEM;
3360}
3361
3362static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
3363 void *unused)
3364{
3365 /* just so we have it. you never know what interesting things we
3366 * might want to do here some day...
3367 */
3368
3369 return NOTIFY_DONE;
3370}
3371
3372static struct notifier_block drbd_notifier = {
3373 .notifier_call = drbd_notify_sys,
3374};
3375
3376static void drbd_release_ee_lists(struct drbd_conf *mdev)
3377{
3378 int rr;
3379
3380 rr = drbd_release_ee(mdev, &mdev->active_ee);
3381 if (rr)
3382 dev_err(DEV, "%d EEs in active list found!\n", rr);
3383
3384 rr = drbd_release_ee(mdev, &mdev->sync_ee);
3385 if (rr)
3386 dev_err(DEV, "%d EEs in sync list found!\n", rr);
3387
3388 rr = drbd_release_ee(mdev, &mdev->read_ee);
3389 if (rr)
3390 dev_err(DEV, "%d EEs in read list found!\n", rr);
3391
3392 rr = drbd_release_ee(mdev, &mdev->done_ee);
3393 if (rr)
3394 dev_err(DEV, "%d EEs in done list found!\n", rr);
3395
3396 rr = drbd_release_ee(mdev, &mdev->net_ee);
3397 if (rr)
3398 dev_err(DEV, "%d EEs in net list found!\n", rr);
3399}
3400
3401/* caution. no locking.
3402 * currently only used from module cleanup code. */
3403static void drbd_delete_device(unsigned int minor)
3404{
3405 struct drbd_conf *mdev = minor_to_mdev(minor);
3406
3407 if (!mdev)
3408 return;
3409
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02003410 del_timer_sync(&mdev->request_timer);
3411
Philipp Reisnerb411b362009-09-25 16:07:19 -07003412 /* paranoia asserts */
3413 if (mdev->open_cnt != 0)
3414 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
3415 __FILE__ , __LINE__);
3416
3417 ERR_IF (!list_empty(&mdev->data.work.q)) {
3418 struct list_head *lp;
3419 list_for_each(lp, &mdev->data.work.q) {
3420 dev_err(DEV, "lp = %p\n", lp);
3421 }
3422 };
3423 /* end paranoia asserts */
3424
3425 del_gendisk(mdev->vdisk);
3426
3427 /* cleanup stuff that may have been allocated during
3428 * device (re-)configuration or state changes */
3429
3430 if (mdev->this_bdev)
3431 bdput(mdev->this_bdev);
3432
3433 drbd_free_resources(mdev);
3434
3435 drbd_release_ee_lists(mdev);
3436
Bart Van Assche24c48302011-05-21 18:32:29 +02003437 /* should be freed on disconnect? */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438 kfree(mdev->ee_hash);
3439 /*
3440 mdev->ee_hash_s = 0;
3441 mdev->ee_hash = NULL;
3442 */
3443
3444 lc_destroy(mdev->act_log);
3445 lc_destroy(mdev->resync);
3446
3447 kfree(mdev->p_uuid);
3448 /* mdev->p_uuid = NULL; */
3449
3450 kfree(mdev->int_dig_out);
3451 kfree(mdev->int_dig_in);
3452 kfree(mdev->int_dig_vv);
3453
3454 /* cleanup the rest that has been
3455 * allocated from drbd_new_device
3456 * and actually free the mdev itself */
3457 drbd_free_mdev(mdev);
3458}
3459
3460static void drbd_cleanup(void)
3461{
3462 unsigned int i;
3463
3464 unregister_reboot_notifier(&drbd_notifier);
3465
Lars Ellenberg17a93f32010-11-24 10:37:35 +01003466 /* first remove proc,
3467 * drbdsetup uses it's presence to detect
3468 * whether DRBD is loaded.
3469 * If we would get stuck in proc removal,
3470 * but have netlink already deregistered,
3471 * some drbdsetup commands may wait forever
3472 * for an answer.
3473 */
3474 if (drbd_proc)
3475 remove_proc_entry("drbd", NULL);
3476
Philipp Reisnerb411b362009-09-25 16:07:19 -07003477 drbd_nl_cleanup();
3478
3479 if (minor_table) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480 i = minor_count;
3481 while (i--)
3482 drbd_delete_device(i);
3483 drbd_destroy_mempools();
3484 }
3485
3486 kfree(minor_table);
3487
3488 unregister_blkdev(DRBD_MAJOR, "drbd");
3489
3490 printk(KERN_INFO "drbd: module cleanup done.\n");
3491}
3492
3493/**
3494 * drbd_congested() - Callback for pdflush
3495 * @congested_data: User data
3496 * @bdi_bits: Bits pdflush is currently interested in
3497 *
3498 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
3499 */
3500static int drbd_congested(void *congested_data, int bdi_bits)
3501{
3502 struct drbd_conf *mdev = congested_data;
3503 struct request_queue *q;
3504 char reason = '-';
3505 int r = 0;
3506
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01003507 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 /* DRBD has frozen IO */
3509 r = bdi_bits;
3510 reason = 'd';
3511 goto out;
3512 }
3513
3514 if (get_ldev(mdev)) {
3515 q = bdev_get_queue(mdev->ldev->backing_bdev);
3516 r = bdi_congested(&q->backing_dev_info, bdi_bits);
3517 put_ldev(mdev);
3518 if (r)
3519 reason = 'b';
3520 }
3521
3522 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
3523 r |= (1 << BDI_async_congested);
3524 reason = reason == 'b' ? 'a' : 'n';
3525 }
3526
3527out:
3528 mdev->congestion_reason = reason;
3529 return r;
3530}
3531
3532struct drbd_conf *drbd_new_device(unsigned int minor)
3533{
3534 struct drbd_conf *mdev;
3535 struct gendisk *disk;
3536 struct request_queue *q;
3537
3538 /* GFP_KERNEL, we are outside of all write-out paths */
3539 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
3540 if (!mdev)
3541 return NULL;
3542 if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
3543 goto out_no_cpumask;
3544
3545 mdev->minor = minor;
3546
3547 drbd_init_set_defaults(mdev);
3548
3549 q = blk_alloc_queue(GFP_KERNEL);
3550 if (!q)
3551 goto out_no_q;
3552 mdev->rq_queue = q;
3553 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003554
3555 disk = alloc_disk(1);
3556 if (!disk)
3557 goto out_no_disk;
3558 mdev->vdisk = disk;
3559
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003560 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003561
3562 disk->queue = q;
3563 disk->major = DRBD_MAJOR;
3564 disk->first_minor = minor;
3565 disk->fops = &drbd_ops;
3566 sprintf(disk->disk_name, "drbd%d", minor);
3567 disk->private_data = mdev;
3568
3569 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
3570 /* we have no partitions. we contain only ourselves. */
3571 mdev->this_bdev->bd_contains = mdev->this_bdev;
3572
3573 q->backing_dev_info.congested_fn = drbd_congested;
3574 q->backing_dev_info.congested_data = mdev;
3575
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01003576 blk_queue_make_request(q, drbd_make_request);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003577 /* Setting the max_hw_sectors to an odd value of 8kibyte here
3578 This triggers a max_bio_size message upon first attach or connect */
3579 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003580 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3581 blk_queue_merge_bvec(q, drbd_merge_bvec);
Jens Axboe7eaceac2011-03-10 08:52:07 +01003582 q->queue_lock = &mdev->req_lock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003583
3584 mdev->md_io_page = alloc_page(GFP_KERNEL);
3585 if (!mdev->md_io_page)
3586 goto out_no_io_page;
3587
3588 if (drbd_bm_init(mdev))
3589 goto out_no_bitmap;
3590 /* no need to lock access, we are still initializing this minor device. */
3591 if (!tl_init(mdev))
3592 goto out_no_tl;
3593
3594 mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
3595 if (!mdev->app_reads_hash)
3596 goto out_no_app_reads;
3597
3598 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
3599 if (!mdev->current_epoch)
3600 goto out_no_epoch;
3601
3602 INIT_LIST_HEAD(&mdev->current_epoch->list);
3603 mdev->epochs = 1;
3604
3605 return mdev;
3606
3607/* out_whatever_else:
3608 kfree(mdev->current_epoch); */
3609out_no_epoch:
3610 kfree(mdev->app_reads_hash);
3611out_no_app_reads:
3612 tl_cleanup(mdev);
3613out_no_tl:
3614 drbd_bm_cleanup(mdev);
3615out_no_bitmap:
3616 __free_page(mdev->md_io_page);
3617out_no_io_page:
3618 put_disk(disk);
3619out_no_disk:
3620 blk_cleanup_queue(q);
3621out_no_q:
3622 free_cpumask_var(mdev->cpu_mask);
3623out_no_cpumask:
3624 kfree(mdev);
3625 return NULL;
3626}
3627
3628/* counterpart of drbd_new_device.
3629 * last part of drbd_delete_device. */
3630void drbd_free_mdev(struct drbd_conf *mdev)
3631{
3632 kfree(mdev->current_epoch);
3633 kfree(mdev->app_reads_hash);
3634 tl_cleanup(mdev);
3635 if (mdev->bitmap) /* should no longer be there. */
3636 drbd_bm_cleanup(mdev);
3637 __free_page(mdev->md_io_page);
3638 put_disk(mdev->vdisk);
3639 blk_cleanup_queue(mdev->rq_queue);
3640 free_cpumask_var(mdev->cpu_mask);
Philipp Reisner37190942010-11-10 12:08:37 +01003641 drbd_free_tl_hash(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 kfree(mdev);
3643}
3644
3645
3646int __init drbd_init(void)
3647{
3648 int err;
3649
3650 if (sizeof(struct p_handshake) != 80) {
3651 printk(KERN_ERR
3652 "drbd: never change the size or layout "
3653 "of the HandShake packet.\n");
3654 return -EINVAL;
3655 }
3656
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01003657 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003658 printk(KERN_ERR
3659 "drbd: invalid minor_count (%d)\n", minor_count);
3660#ifdef MODULE
3661 return -EINVAL;
3662#else
3663 minor_count = 8;
3664#endif
3665 }
3666
3667 err = drbd_nl_init();
3668 if (err)
3669 return err;
3670
3671 err = register_blkdev(DRBD_MAJOR, "drbd");
3672 if (err) {
3673 printk(KERN_ERR
3674 "drbd: unable to register block device major %d\n",
3675 DRBD_MAJOR);
3676 return err;
3677 }
3678
3679 register_reboot_notifier(&drbd_notifier);
3680
3681 /*
3682 * allocate all necessary structs
3683 */
3684 err = -ENOMEM;
3685
3686 init_waitqueue_head(&drbd_pp_wait);
3687
3688 drbd_proc = NULL; /* play safe for drbd_cleanup */
3689 minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
3690 GFP_KERNEL);
3691 if (!minor_table)
3692 goto Enomem;
3693
3694 err = drbd_create_mempools();
3695 if (err)
3696 goto Enomem;
3697
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01003698 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699 if (!drbd_proc) {
3700 printk(KERN_ERR "drbd: unable to register proc file\n");
3701 goto Enomem;
3702 }
3703
3704 rwlock_init(&global_state_lock);
3705
3706 printk(KERN_INFO "drbd: initialized. "
3707 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
3708 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
3709 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
3710 printk(KERN_INFO "drbd: registered as block device major %d\n",
3711 DRBD_MAJOR);
3712 printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
3713
3714 return 0; /* Success! */
3715
3716Enomem:
3717 drbd_cleanup();
3718 if (err == -ENOMEM)
3719 /* currently always the case */
3720 printk(KERN_ERR "drbd: ran out of memory\n");
3721 else
3722 printk(KERN_ERR "drbd: initialization failure\n");
3723 return err;
3724}
3725
3726void drbd_free_bc(struct drbd_backing_dev *ldev)
3727{
3728 if (ldev == NULL)
3729 return;
3730
Tejun Heoe525fd82010-11-13 11:55:17 +01003731 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3732 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733
3734 kfree(ldev);
3735}
3736
3737void drbd_free_sock(struct drbd_conf *mdev)
3738{
3739 if (mdev->data.socket) {
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003740 mutex_lock(&mdev->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741 kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
3742 sock_release(mdev->data.socket);
3743 mdev->data.socket = NULL;
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003744 mutex_unlock(&mdev->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003745 }
3746 if (mdev->meta.socket) {
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003747 mutex_lock(&mdev->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
3749 sock_release(mdev->meta.socket);
3750 mdev->meta.socket = NULL;
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003751 mutex_unlock(&mdev->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003752 }
3753}
3754
3755
3756void drbd_free_resources(struct drbd_conf *mdev)
3757{
3758 crypto_free_hash(mdev->csums_tfm);
3759 mdev->csums_tfm = NULL;
3760 crypto_free_hash(mdev->verify_tfm);
3761 mdev->verify_tfm = NULL;
3762 crypto_free_hash(mdev->cram_hmac_tfm);
3763 mdev->cram_hmac_tfm = NULL;
3764 crypto_free_hash(mdev->integrity_w_tfm);
3765 mdev->integrity_w_tfm = NULL;
3766 crypto_free_hash(mdev->integrity_r_tfm);
3767 mdev->integrity_r_tfm = NULL;
3768
3769 drbd_free_sock(mdev);
3770
3771 __no_warn(local,
3772 drbd_free_bc(mdev->ldev);
3773 mdev->ldev = NULL;);
3774}
3775
3776/* meta data management */
3777
3778struct meta_data_on_disk {
3779 u64 la_size; /* last agreed size. */
3780 u64 uuid[UI_SIZE]; /* UUIDs. */
3781 u64 device_uuid;
3782 u64 reserved_u64_1;
3783 u32 flags; /* MDF */
3784 u32 magic;
3785 u32 md_size_sect;
3786 u32 al_offset; /* offset to this block */
3787 u32 al_nr_extents; /* important for restoring the AL */
3788 /* `-- act_log->nr_elements <-- sync_conf.al_extents */
3789 u32 bm_offset; /* offset to the bitmap, from here */
3790 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02003791 u32 la_peer_max_bio_size; /* last peer max_bio_size */
3792 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003793
3794} __packed;
3795
3796/**
3797 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
3798 * @mdev: DRBD device.
3799 */
3800void drbd_md_sync(struct drbd_conf *mdev)
3801{
3802 struct meta_data_on_disk *buffer;
3803 sector_t sector;
3804 int i;
3805
Lars Ellenbergee15b032010-09-03 10:00:09 +02003806 del_timer(&mdev->md_sync_timer);
3807 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003808 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
3809 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810
3811 /* We use here D_FAILED and not D_ATTACHING because we try to write
3812 * metadata even if we detach due to a disk failure! */
3813 if (!get_ldev_if_state(mdev, D_FAILED))
3814 return;
3815
Philipp Reisnere1711732011-06-27 11:51:46 +02003816 buffer = drbd_md_get_buffer(mdev);
3817 if (!buffer)
3818 goto out;
3819
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 memset(buffer, 0, 512);
3821
3822 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
3823 for (i = UI_CURRENT; i < UI_SIZE; i++)
3824 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
3825 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
3826 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
3827
3828 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
3829 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
3830 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
3831 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
3832 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
3833
3834 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003835 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836
3837 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
3838 sector = mdev->ldev->md.md_offset;
3839
Lars Ellenberg3f3a9b82010-09-01 15:12:12 +02003840 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 /* this was a try anyways ... */
3842 dev_err(DEV, "meta data update failed!\n");
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003843 drbd_chk_io_error(mdev, 1, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844 }
3845
3846 /* Update mdev->ldev->md.la_size_sect,
3847 * since we updated it on metadata. */
3848 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
3849
Philipp Reisnere1711732011-06-27 11:51:46 +02003850 drbd_md_put_buffer(mdev);
3851out:
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852 put_ldev(mdev);
3853}
3854
3855/**
3856 * drbd_md_read() - Reads in the meta data super block
3857 * @mdev: DRBD device.
3858 * @bdev: Device from which the meta data should be read in.
3859 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01003860 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07003861 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
3862 */
3863int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3864{
3865 struct meta_data_on_disk *buffer;
3866 int i, rv = NO_ERROR;
3867
3868 if (!get_ldev_if_state(mdev, D_ATTACHING))
3869 return ERR_IO_MD_DISK;
3870
Philipp Reisnere1711732011-06-27 11:51:46 +02003871 buffer = drbd_md_get_buffer(mdev);
3872 if (!buffer)
3873 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003874
3875 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003876 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07003877 called BEFORE disk is attached */
3878 dev_err(DEV, "Error while reading metadata.\n");
3879 rv = ERR_IO_MD_DISK;
3880 goto err;
3881 }
3882
3883 if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
3884 dev_err(DEV, "Error while reading metadata, magic not found.\n");
3885 rv = ERR_MD_INVALID;
3886 goto err;
3887 }
3888 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
3889 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
3890 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
3891 rv = ERR_MD_INVALID;
3892 goto err;
3893 }
3894 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
3895 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
3896 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
3897 rv = ERR_MD_INVALID;
3898 goto err;
3899 }
3900 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
3901 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
3902 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
3903 rv = ERR_MD_INVALID;
3904 goto err;
3905 }
3906
3907 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3908 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3909 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3910 rv = ERR_MD_INVALID;
3911 goto err;
3912 }
3913
3914 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
3915 for (i = UI_CURRENT; i < UI_SIZE; i++)
3916 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3917 bdev->md.flags = be32_to_cpu(buffer->flags);
3918 mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
3919 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3920
Philipp Reisner99432fc2011-05-20 16:39:13 +02003921 spin_lock_irq(&mdev->req_lock);
3922 if (mdev->state.conn < C_CONNECTED) {
3923 int peer;
3924 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
3925 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
3926 mdev->peer_max_bio_size = peer;
3927 }
3928 spin_unlock_irq(&mdev->req_lock);
3929
Philipp Reisnerb411b362009-09-25 16:07:19 -07003930 if (mdev->sync_conf.al_extents < 7)
3931 mdev->sync_conf.al_extents = 127;
3932
3933 err:
Philipp Reisnere1711732011-06-27 11:51:46 +02003934 drbd_md_put_buffer(mdev);
3935 out:
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936 put_ldev(mdev);
3937
3938 return rv;
3939}
3940
3941/**
3942 * drbd_md_mark_dirty() - Mark meta data super block as dirty
3943 * @mdev: DRBD device.
3944 *
3945 * Call this function if you change anything that should be written to
3946 * the meta-data super block. This function sets MD_DIRTY, and starts a
3947 * timer that ensures that within five seconds you have to call drbd_md_sync().
3948 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003949#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02003950void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
3951{
3952 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
3953 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
3954 mdev->last_md_mark_dirty.line = line;
3955 mdev->last_md_mark_dirty.func = func;
3956 }
3957}
3958#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003959void drbd_md_mark_dirty(struct drbd_conf *mdev)
3960{
Lars Ellenbergee15b032010-09-03 10:00:09 +02003961 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003962 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963}
Lars Ellenbergee15b032010-09-03 10:00:09 +02003964#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965
3966static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3967{
3968 int i;
3969
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003970 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003971 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972}
3973
3974void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3975{
3976 if (idx == UI_CURRENT) {
3977 if (mdev->state.role == R_PRIMARY)
3978 val |= 1;
3979 else
3980 val &= ~((u64)1);
3981
3982 drbd_set_ed_uuid(mdev, val);
3983 }
3984
3985 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986 drbd_md_mark_dirty(mdev);
3987}
3988
3989
3990void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3991{
3992 if (mdev->ldev->md.uuid[idx]) {
3993 drbd_uuid_move_history(mdev);
3994 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995 }
3996 _drbd_uuid_set(mdev, idx, val);
3997}
3998
3999/**
4000 * drbd_uuid_new_current() - Creates a new current UUID
4001 * @mdev: DRBD device.
4002 *
4003 * Creates a new current UUID, and rotates the old current UUID into
4004 * the bitmap slot. Causes an incremental resync upon next connect.
4005 */
4006void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
4007{
4008 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004009 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07004010
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004011 if (bm_uuid)
4012 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
4013
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07004015
4016 get_random_bytes(&val, sizeof(u64));
4017 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004018 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02004019 /* get it to stable storage _now_ */
4020 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021}
4022
4023void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
4024{
4025 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
4026 return;
4027
4028 if (val == 0) {
4029 drbd_uuid_move_history(mdev);
4030 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
4031 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004032 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004033 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
4034 if (bm_uuid)
4035 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004037 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038 }
4039 drbd_md_mark_dirty(mdev);
4040}
4041
4042/**
4043 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
4044 * @mdev: DRBD device.
4045 *
4046 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
4047 */
4048int drbd_bmio_set_n_write(struct drbd_conf *mdev)
4049{
4050 int rv = -EIO;
4051
4052 if (get_ldev_if_state(mdev, D_ATTACHING)) {
4053 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
4054 drbd_md_sync(mdev);
4055 drbd_bm_set_all(mdev);
4056
4057 rv = drbd_bm_write(mdev);
4058
4059 if (!rv) {
4060 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
4061 drbd_md_sync(mdev);
4062 }
4063
4064 put_ldev(mdev);
4065 }
4066
4067 return rv;
4068}
4069
4070/**
4071 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
4072 * @mdev: DRBD device.
4073 *
4074 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
4075 */
4076int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
4077{
4078 int rv = -EIO;
4079
Philipp Reisner07782862010-08-31 12:00:50 +02004080 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081 if (get_ldev_if_state(mdev, D_ATTACHING)) {
4082 drbd_bm_clear_all(mdev);
4083 rv = drbd_bm_write(mdev);
4084 put_ldev(mdev);
4085 }
4086
4087 return rv;
4088}
4089
4090static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4091{
4092 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Lars Ellenberg02851e92010-12-16 14:47:39 +01004093 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004094
4095 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
4096
Lars Ellenberg02851e92010-12-16 14:47:39 +01004097 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004098 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01004099 rv = work->io_fn(mdev);
4100 drbd_bm_unlock(mdev);
4101 put_ldev(mdev);
4102 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103
4104 clear_bit(BITMAP_IO, &mdev->flags);
Philipp Reisner127b3172010-11-16 10:07:53 +01004105 smp_mb__after_clear_bit();
Philipp Reisnerb411b362009-09-25 16:07:19 -07004106 wake_up(&mdev->misc_wait);
4107
4108 if (work->done)
4109 work->done(mdev, rv);
4110
4111 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
4112 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004113 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114
4115 return 1;
4116}
4117
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02004118void drbd_ldev_destroy(struct drbd_conf *mdev)
4119{
4120 lc_destroy(mdev->resync);
4121 mdev->resync = NULL;
4122 lc_destroy(mdev->act_log);
4123 mdev->act_log = NULL;
4124 __no_warn(local,
4125 drbd_free_bc(mdev->ldev);
4126 mdev->ldev = NULL;);
4127
4128 if (mdev->md_io_tmpp) {
4129 __free_page(mdev->md_io_tmpp);
4130 mdev->md_io_tmpp = NULL;
4131 }
4132 clear_bit(GO_DISKLESS, &mdev->flags);
4133}
4134
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004135static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4136{
4137 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02004138 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
4139 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02004140 * the protected members anymore, though, so once put_ldev reaches zero
4141 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004142 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004143 return 1;
4144}
4145
4146void drbd_go_diskless(struct drbd_conf *mdev)
4147{
4148 D_ASSERT(mdev->state.disk == D_FAILED);
4149 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Lars Ellenberg9d282872010-10-14 13:57:07 +02004150 drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004151}
4152
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153/**
4154 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
4155 * @mdev: DRBD device.
4156 * @io_fn: IO callback to be called when bitmap IO is possible
4157 * @done: callback to be called after the bitmap IO was performed
4158 * @why: Descriptive text of the reason for doing the IO
4159 *
4160 * While IO on the bitmap happens we freeze application IO thus we ensure
4161 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
4162 * called from worker context. It MUST NOT be used while a previous such
4163 * work is still pending!
4164 */
4165void drbd_queue_bitmap_io(struct drbd_conf *mdev,
4166 int (*io_fn)(struct drbd_conf *),
4167 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004168 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169{
4170 D_ASSERT(current == mdev->worker.task);
4171
4172 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
4173 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
4174 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
4175 if (mdev->bm_io_work.why)
4176 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
4177 why, mdev->bm_io_work.why);
4178
4179 mdev->bm_io_work.io_fn = io_fn;
4180 mdev->bm_io_work.done = done;
4181 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004182 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183
Philipp Reisner22afd7e2010-11-16 15:30:44 +01004184 spin_lock_irq(&mdev->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185 set_bit(BITMAP_IO, &mdev->flags);
4186 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01004187 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 }
Philipp Reisner22afd7e2010-11-16 15:30:44 +01004190 spin_unlock_irq(&mdev->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004191}
4192
4193/**
4194 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
4195 * @mdev: DRBD device.
4196 * @io_fn: IO callback to be called when bitmap IO is possible
4197 * @why: Descriptive text of the reason for doing the IO
4198 *
4199 * freezes application IO while that the actual IO operations runs. This
4200 * functions MAY NOT be called from worker context.
4201 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004202int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
4203 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204{
4205 int rv;
4206
4207 D_ASSERT(current != mdev->worker.task);
4208
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004209 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4210 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004211
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004212 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004213 rv = io_fn(mdev);
4214 drbd_bm_unlock(mdev);
4215
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004216 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4217 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218
4219 return rv;
4220}
4221
4222void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4223{
4224 if ((mdev->ldev->md.flags & flag) != flag) {
4225 drbd_md_mark_dirty(mdev);
4226 mdev->ldev->md.flags |= flag;
4227 }
4228}
4229
4230void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4231{
4232 if ((mdev->ldev->md.flags & flag) != 0) {
4233 drbd_md_mark_dirty(mdev);
4234 mdev->ldev->md.flags &= ~flag;
4235 }
4236}
4237int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
4238{
4239 return (bdev->md.flags & flag) != 0;
4240}
4241
4242static void md_sync_timer_fn(unsigned long data)
4243{
4244 struct drbd_conf *mdev = (struct drbd_conf *) data;
4245
4246 drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
4247}
4248
4249static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4250{
4251 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02004252#ifdef DEBUG
4253 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
4254 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
4255#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257 return 1;
4258}
4259
4260#ifdef CONFIG_DRBD_FAULT_INJECTION
4261/* Fault insertion support including random number generator shamelessly
4262 * stolen from kernel/rcutorture.c */
4263struct fault_random_state {
4264 unsigned long state;
4265 unsigned long count;
4266};
4267
4268#define FAULT_RANDOM_MULT 39916801 /* prime */
4269#define FAULT_RANDOM_ADD 479001701 /* prime */
4270#define FAULT_RANDOM_REFRESH 10000
4271
4272/*
4273 * Crude but fast random-number generator. Uses a linear congruential
4274 * generator, with occasional help from get_random_bytes().
4275 */
4276static unsigned long
4277_drbd_fault_random(struct fault_random_state *rsp)
4278{
4279 long refresh;
4280
Roel Kluin49829ea2009-12-15 22:55:44 +01004281 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004282 get_random_bytes(&refresh, sizeof(refresh));
4283 rsp->state += refresh;
4284 rsp->count = FAULT_RANDOM_REFRESH;
4285 }
4286 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
4287 return swahw32(rsp->state);
4288}
4289
4290static char *
4291_drbd_fault_str(unsigned int type) {
4292 static char *_faults[] = {
4293 [DRBD_FAULT_MD_WR] = "Meta-data write",
4294 [DRBD_FAULT_MD_RD] = "Meta-data read",
4295 [DRBD_FAULT_RS_WR] = "Resync write",
4296 [DRBD_FAULT_RS_RD] = "Resync read",
4297 [DRBD_FAULT_DT_WR] = "Data write",
4298 [DRBD_FAULT_DT_RD] = "Data read",
4299 [DRBD_FAULT_DT_RA] = "Data read ahead",
4300 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02004301 [DRBD_FAULT_AL_EE] = "EE allocation",
4302 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 };
4304
4305 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
4306}
4307
4308unsigned int
4309_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
4310{
4311 static struct fault_random_state rrs = {0, 0};
4312
4313 unsigned int ret = (
4314 (fault_devs == 0 ||
4315 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
4316 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
4317
4318 if (ret) {
4319 fault_count++;
4320
Lars Ellenberg73835062010-05-27 11:51:56 +02004321 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 dev_warn(DEV, "***Simulating %s failure\n",
4323 _drbd_fault_str(type));
4324 }
4325
4326 return ret;
4327}
4328#endif
4329
4330const char *drbd_buildtag(void)
4331{
4332 /* DRBD built from external sources has here a reference to the
4333 git hash of the source code. */
4334
4335 static char buildtag[38] = "\0uilt-in";
4336
4337 if (buildtag[0] == 0) {
4338#ifdef CONFIG_MODULES
4339 if (THIS_MODULE != NULL)
4340 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
4341 else
4342#endif
4343 buildtag[0] = 'b';
4344 }
4345
4346 return buildtag;
4347}
4348
4349module_init(drbd_init)
4350module_exit(drbd_cleanup)
4351
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352EXPORT_SYMBOL(drbd_conn_str);
4353EXPORT_SYMBOL(drbd_role_str);
4354EXPORT_SYMBOL(drbd_disk_str);
4355EXPORT_SYMBOL(drbd_set_st_err_str);