blob: 448de7bf8223bebc97ffacf8a27576de6b270fd9 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020059static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070060int drbdd_init(struct drbd_thread *);
61int drbd_worker(struct drbd_thread *);
62int drbd_asender(struct drbd_thread *);
63
64int drbd_init(void);
65static int drbd_open(struct block_device *bdev, fmode_t mode);
66static int drbd_release(struct gendisk *gd, fmode_t mode);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010067static int w_md_sync(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068static void md_sync_timer_fn(unsigned long data);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010069static int w_bitmap_io(struct drbd_work *w, int unused);
70static int w_go_diskless(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
73 "Lars Ellenberg <lars@linbit.com>");
74MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
75MODULE_VERSION(REL_VERSION);
76MODULE_LICENSE("GPL");
Philipp Reisner81a5d602011-02-22 19:53:16 -050077MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010078 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070079MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
80
81#include <linux/moduleparam.h>
82/* allow_open_on_secondary */
83MODULE_PARM_DESC(allow_oos, "DONT USE!");
84/* thanks to these macros, if compiled into the kernel (not-module),
85 * this becomes the boot parameter drbd.minor_count */
86module_param(minor_count, uint, 0444);
87module_param(disable_sendpage, bool, 0644);
88module_param(allow_oos, bool, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -070089module_param(proc_details, int, 0644);
90
91#ifdef CONFIG_DRBD_FAULT_INJECTION
92int enable_faults;
93int fault_rate;
94static int fault_count;
95int fault_devs;
96/* bitmap of enabled faults */
97module_param(enable_faults, int, 0664);
98/* fault rate % value - applies to all enabled faults */
99module_param(fault_rate, int, 0664);
100/* count of faults inserted */
101module_param(fault_count, int, 0664);
102/* bitmap of devices to insert faults on */
103module_param(fault_devs, int, 0644);
104#endif
105
106/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100107unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108int disable_sendpage;
109int allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700110int proc_details; /* Detail level in proc drbd*/
111
112/* Module parameter for setting the user mode helper program
113 * to run. Default is /sbin/drbdadm */
114char usermode_helper[80] = "/sbin/drbdadm";
115
116module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
117
118/* in 2.6.x, our device mapping and config info contains our virtual gendisks
119 * as member "struct gendisk *vdisk;"
120 */
Philipp Reisner81a5d602011-02-22 19:53:16 -0500121struct idr minors;
Philipp Reisner21114382011-01-19 12:26:59 +0100122struct list_head drbd_tconns; /* list of struct drbd_tconn */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700123
124struct kmem_cache *drbd_request_cache;
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +0100125struct kmem_cache *drbd_ee_cache; /* peer requests */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700126struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
127struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
128mempool_t *drbd_request_mempool;
129mempool_t *drbd_ee_mempool;
Lars Ellenberg35abf592011-02-23 12:39:46 +0100130mempool_t *drbd_md_io_page_pool;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100131struct bio_set *drbd_md_io_bio_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700132
133/* I do not use a standard mempool, because:
134 1) I want to hand out the pre-allocated objects first.
135 2) I want to be able to interrupt sleeping allocation with a signal.
136 Note: This is a single linked list, the next pointer is the private
137 member of struct page.
138 */
139struct page *drbd_pp_pool;
140spinlock_t drbd_pp_lock;
141int drbd_pp_vacant;
142wait_queue_head_t drbd_pp_wait;
143
144DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
145
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100146static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700147 .owner = THIS_MODULE,
148 .open = drbd_open,
149 .release = drbd_release,
150};
151
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100152static void bio_destructor_drbd(struct bio *bio)
153{
154 bio_free(bio, drbd_md_io_bio_set);
155}
156
157struct bio *bio_alloc_drbd(gfp_t gfp_mask)
158{
159 struct bio *bio;
160
161 if (!drbd_md_io_bio_set)
162 return bio_alloc(gfp_mask, 1);
163
164 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
165 if (!bio)
166 return NULL;
167 bio->bi_destructor = bio_destructor_drbd;
168 return bio;
169}
170
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171#ifdef __CHECKER__
172/* When checking with sparse, and this is an inline function, sparse will
173 give tons of false positives. When this is a real functions sparse works.
174 */
175int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
176{
177 int io_allowed;
178
179 atomic_inc(&mdev->local_cnt);
180 io_allowed = (mdev->state.disk >= mins);
181 if (!io_allowed) {
182 if (atomic_dec_and_test(&mdev->local_cnt))
183 wake_up(&mdev->misc_wait);
184 }
185 return io_allowed;
186}
187
188#endif
189
190/**
191 * DOC: The transfer log
192 *
193 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100194 * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195 * of the list. There is always at least one &struct drbd_tl_epoch object.
196 *
197 * Each &struct drbd_tl_epoch has a circular double linked list of requests
198 * attached.
199 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100200static int tl_init(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700201{
202 struct drbd_tl_epoch *b;
203
204 /* during device minor initialization, we may well use GFP_KERNEL */
205 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
206 if (!b)
207 return 0;
208 INIT_LIST_HEAD(&b->requests);
209 INIT_LIST_HEAD(&b->w.list);
210 b->next = NULL;
211 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200212 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
214
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100215 tconn->oldest_tle = b;
216 tconn->newest_tle = b;
217 INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219 return 1;
220}
221
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100222static void tl_cleanup(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100224 if (tconn->oldest_tle != tconn->newest_tle)
225 conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
226 if (!list_empty(&tconn->out_of_sequence_requests))
227 conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
228 kfree(tconn->oldest_tle);
229 tconn->oldest_tle = NULL;
230 kfree(tconn->unused_spare_tle);
231 tconn->unused_spare_tle = NULL;
Andreas Gruenbacherd6287692011-01-13 23:05:39 +0100232}
233
Philipp Reisnerb411b362009-09-25 16:07:19 -0700234/**
235 * _tl_add_barrier() - Adds a barrier to the transfer log
236 * @mdev: DRBD device.
237 * @new: Barrier to be added before the current head of the TL.
238 *
239 * The caller must hold the req_lock.
240 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100241void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242{
243 struct drbd_tl_epoch *newest_before;
244
245 INIT_LIST_HEAD(&new->requests);
246 INIT_LIST_HEAD(&new->w.list);
247 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
248 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200249 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100251 newest_before = tconn->newest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252 /* never send a barrier number == 0, because that is special-cased
253 * when using TCQ for our write ordering code */
254 new->br_number = (newest_before->br_number+1) ?: 1;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100255 if (tconn->newest_tle != new) {
256 tconn->newest_tle->next = new;
257 tconn->newest_tle = new;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700258 }
259}
260
261/**
262 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
263 * @mdev: DRBD device.
264 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
265 * @set_size: Expected number of requests before that barrier.
266 *
267 * In case the passed barrier_nr or set_size does not match the oldest
268 * &struct drbd_tl_epoch objects this function will cause a termination
269 * of the connection.
270 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100271void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
272 unsigned int set_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700273{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100274 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 struct drbd_tl_epoch *b, *nob; /* next old barrier */
276 struct list_head *le, *tle;
277 struct drbd_request *r;
278
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100279 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100281 b = tconn->oldest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700282
283 /* first some paranoia code */
284 if (b == NULL) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100285 conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
286 barrier_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700287 goto bail;
288 }
289 if (b->br_number != barrier_nr) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100290 conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
291 barrier_nr, b->br_number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700292 goto bail;
293 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200294 if (b->n_writes != set_size) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100295 conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
296 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700297 goto bail;
298 }
299
300 /* Clean up list of requests processed during current epoch */
301 list_for_each_safe(le, tle, &b->requests) {
302 r = list_entry(le, struct drbd_request, tl_requests);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100303 _req_mod(r, BARRIER_ACKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 }
305 /* There could be requests on the list waiting for completion
306 of the write to the local disk. To avoid corruptions of
307 slab's data structures we have to remove the lists head.
308
309 Also there could have been a barrier ack out of sequence, overtaking
310 the write acks - which would be a bug and violating write ordering.
311 To not deadlock in case we lose connection while such requests are
312 still pending, we need some way to find them for the
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100313 _req_mode(CONNECTION_LOST_WHILE_PENDING).
Philipp Reisnerb411b362009-09-25 16:07:19 -0700314
315 These have been list_move'd to the out_of_sequence_requests list in
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100316 _req_mod(, BARRIER_ACKED) above.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 */
318 list_del_init(&b->requests);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100319 mdev = b->w.mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320
321 nob = b->next;
322 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100323 _tl_add_barrier(tconn, b);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 if (nob)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100325 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326 /* if nob == NULL b was the only barrier, and becomes the new
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100327 barrier. Therefore tconn->oldest_tle points already to b */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 } else {
329 D_ASSERT(nob != NULL);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100330 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331 kfree(b);
332 }
333
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100334 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335 dec_ap_pending(mdev);
336
337 return;
338
339bail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100340 spin_unlock_irq(&tconn->req_lock);
341 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342}
343
Philipp Reisner617049a2010-12-22 12:48:31 +0100344
Philipp Reisner11b58e72010-05-12 17:08:26 +0200345/**
346 * _tl_restart() - Walks the transfer log, and applies an action to all requests
347 * @mdev: DRBD device.
348 * @what: The action/event to perform with all request objects
349 *
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100350 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
351 * RESTART_FROZEN_DISK_IO.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200352 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100353void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200354{
355 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200356 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200357 struct drbd_request *req;
358 int rv, n_writes, n_reads;
359
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100360 b = tconn->oldest_tle;
361 pn = &tconn->oldest_tle;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200362 while (b) {
363 n_writes = 0;
364 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200365 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200366 list_for_each_safe(le, tle, &b->requests) {
367 req = list_entry(le, struct drbd_request, tl_requests);
368 rv = _req_mod(req, what);
369
370 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
371 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
372 }
373 tmp = b->next;
374
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200375 if (n_writes) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100376 if (what == RESEND) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200377 b->n_writes = n_writes;
378 if (b->w.cb == NULL) {
379 b->w.cb = w_send_barrier;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100380 inc_ap_pending(b->w.mdev);
381 set_bit(CREATE_BARRIER, &b->w.mdev->flags);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200382 }
383
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100384 drbd_queue_work(&tconn->data.work, &b->w);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200385 }
386 pn = &b->next;
387 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200388 if (n_reads)
389 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200390 /* there could still be requests on that ring list,
391 * in case local io is still pending */
392 list_del(&b->requests);
393
394 /* dec_ap_pending corresponding to queue_barrier.
395 * the newest barrier may not have been queued yet,
396 * in which case w.cb is still NULL. */
397 if (b->w.cb != NULL)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100398 dec_ap_pending(b->w.mdev);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200399
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100400 if (b == tconn->newest_tle) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200401 /* recycle, but reinit! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100402 if (tmp != NULL)
403 conn_err(tconn, "ASSERT FAILED tmp == NULL");
Philipp Reisner11b58e72010-05-12 17:08:26 +0200404 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200405 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200406 INIT_LIST_HEAD(&b->w.list);
407 b->w.cb = NULL;
408 b->br_number = net_random();
409 b->n_writes = 0;
410
411 *pn = b;
412 break;
413 }
414 *pn = tmp;
415 kfree(b);
416 }
417 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200418 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200419 }
420}
421
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422
423/**
424 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
425 * @mdev: DRBD device.
426 *
427 * This is called after the connection to the peer was lost. The storage covered
428 * by the requests on the transfer gets marked as our of sync. Called from the
429 * receiver thread and the worker thread.
430 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100431void tl_clear(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100433 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 struct list_head *le, *tle;
435 struct drbd_request *r;
Philipp Reisnere90285e2011-03-22 12:51:21 +0100436 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100438 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700439
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100440 _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441
442 /* we expect this list to be empty. */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100443 if (!list_empty(&tconn->out_of_sequence_requests))
444 conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445
446 /* but just in case, clean it up anyways! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100447 list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 r = list_entry(le, struct drbd_request, tl_requests);
449 /* It would be nice to complete outside of spinlock.
450 * But this is easier for now. */
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100451 _req_mod(r, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 }
453
454 /* ensure bit indicating barrier is required is clear */
Philipp Reisner695d08f2011-04-11 22:53:32 -0700455 rcu_read_lock();
Philipp Reisnere90285e2011-03-22 12:51:21 +0100456 idr_for_each_entry(&tconn->volumes, mdev, vnr)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100457 clear_bit(CREATE_BARRIER, &mdev->flags);
Philipp Reisner695d08f2011-04-11 22:53:32 -0700458 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100460 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700461}
462
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100463void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200464{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100465 spin_lock_irq(&tconn->req_lock);
466 _tl_restart(tconn, what);
467 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468}
469
Philipp Reisnerb411b362009-09-25 16:07:19 -0700470static int drbd_thread_setup(void *arg)
471{
472 struct drbd_thread *thi = (struct drbd_thread *) arg;
Philipp Reisner392c8802011-02-09 10:33:31 +0100473 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700474 unsigned long flags;
475 int retval;
476
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100477 snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
Philipp Reisner392c8802011-02-09 10:33:31 +0100478 thi->name[0], thi->tconn->name);
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100479
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480restart:
481 retval = thi->function(thi);
482
483 spin_lock_irqsave(&thi->t_lock, flags);
484
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100485 /* if the receiver has been "EXITING", the last thing it did
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486 * was set the conn state to "StandAlone",
487 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
488 * and receiver thread will be "started".
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100489 * drbd_thread_start needs to set "RESTARTING" in that case.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490 * t_state check and assignment needs to be within the same spinlock,
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100491 * so either thread_start sees EXITING, and can remap to RESTARTING,
492 * or thread_start see NONE, and can proceed as normal.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493 */
494
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100495 if (thi->t_state == RESTARTING) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100496 conn_info(tconn, "Restarting %s thread\n", thi->name);
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100497 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700498 spin_unlock_irqrestore(&thi->t_lock, flags);
499 goto restart;
500 }
501
502 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100503 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700504 smp_mb();
Lars Ellenberg992d6e92011-05-02 11:47:18 +0200505 complete_all(&thi->stop);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700506 spin_unlock_irqrestore(&thi->t_lock, flags);
507
Philipp Reisner392c8802011-02-09 10:33:31 +0100508 conn_info(tconn, "Terminating %s\n", current->comm);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700509
510 /* Release mod reference taken when thread was started */
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +0200511
512 kref_put(&tconn->kref, &conn_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700513 module_put(THIS_MODULE);
514 return retval;
515}
516
Philipp Reisner392c8802011-02-09 10:33:31 +0100517static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100518 int (*func) (struct drbd_thread *), char *name)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700519{
520 spin_lock_init(&thi->t_lock);
521 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100522 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523 thi->function = func;
Philipp Reisner392c8802011-02-09 10:33:31 +0100524 thi->tconn = tconn;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100525 strncpy(thi->name, name, ARRAY_SIZE(thi->name));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700526}
527
528int drbd_thread_start(struct drbd_thread *thi)
529{
Philipp Reisner392c8802011-02-09 10:33:31 +0100530 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700531 struct task_struct *nt;
532 unsigned long flags;
533
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 /* is used from state engine doing drbd_thread_stop_nowait,
535 * while holding the req lock irqsave */
536 spin_lock_irqsave(&thi->t_lock, flags);
537
538 switch (thi->t_state) {
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100539 case NONE:
Philipp Reisner392c8802011-02-09 10:33:31 +0100540 conn_info(tconn, "Starting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100541 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542
543 /* Get ref on module for thread - this is released when thread exits */
544 if (!try_module_get(THIS_MODULE)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100545 conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700546 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100547 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548 }
549
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +0200550 kref_get(&thi->tconn->kref);
551
Philipp Reisnerb411b362009-09-25 16:07:19 -0700552 init_completion(&thi->stop);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700553 thi->reset_cpu_mask = 1;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100554 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555 spin_unlock_irqrestore(&thi->t_lock, flags);
556 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
557
558 nt = kthread_create(drbd_thread_setup, (void *) thi,
Philipp Reisner392c8802011-02-09 10:33:31 +0100559 "drbd_%c_%s", thi->name[0], thi->tconn->name);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700560
561 if (IS_ERR(nt)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100562 conn_err(tconn, "Couldn't start thread\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700563
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +0200564 kref_put(&tconn->kref, &conn_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100566 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567 }
568 spin_lock_irqsave(&thi->t_lock, flags);
569 thi->task = nt;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100570 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571 spin_unlock_irqrestore(&thi->t_lock, flags);
572 wake_up_process(nt);
573 break;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100574 case EXITING:
575 thi->t_state = RESTARTING;
Philipp Reisner392c8802011-02-09 10:33:31 +0100576 conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100577 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700578 /* fall through */
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100579 case RUNNING:
580 case RESTARTING:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700581 default:
582 spin_unlock_irqrestore(&thi->t_lock, flags);
583 break;
584 }
585
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100586 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587}
588
589
590void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
591{
592 unsigned long flags;
593
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100594 enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700595
596 /* may be called from state engine, holding the req lock irqsave */
597 spin_lock_irqsave(&thi->t_lock, flags);
598
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100599 if (thi->t_state == NONE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 spin_unlock_irqrestore(&thi->t_lock, flags);
601 if (restart)
602 drbd_thread_start(thi);
603 return;
604 }
605
606 if (thi->t_state != ns) {
607 if (thi->task == NULL) {
608 spin_unlock_irqrestore(&thi->t_lock, flags);
609 return;
610 }
611
612 thi->t_state = ns;
613 smp_mb();
614 init_completion(&thi->stop);
615 if (thi->task != current)
616 force_sig(DRBD_SIGKILL, thi->task);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 }
618
619 spin_unlock_irqrestore(&thi->t_lock, flags);
620
621 if (wait)
622 wait_for_completion(&thi->stop);
623}
624
Philipp Reisner392c8802011-02-09 10:33:31 +0100625static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100626{
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100627 struct drbd_thread *thi =
628 task == tconn->receiver.task ? &tconn->receiver :
629 task == tconn->asender.task ? &tconn->asender :
630 task == tconn->worker.task ? &tconn->worker : NULL;
631
632 return thi;
633}
634
Philipp Reisner392c8802011-02-09 10:33:31 +0100635char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100636{
Philipp Reisner392c8802011-02-09 10:33:31 +0100637 struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100638 return thi ? thi->name : task->comm;
639}
640
Philipp Reisner80883192011-02-18 14:56:45 +0100641int conn_lowest_minor(struct drbd_tconn *tconn)
Philipp Reisner80822282011-02-08 12:46:30 +0100642{
Philipp Reisnere90285e2011-03-22 12:51:21 +0100643 struct drbd_conf *mdev;
Philipp Reisner695d08f2011-04-11 22:53:32 -0700644 int vnr = 0, m;
Philipp Reisner774b3052011-02-22 02:07:03 -0500645
Philipp Reisner695d08f2011-04-11 22:53:32 -0700646 rcu_read_lock();
Philipp Reisnere90285e2011-03-22 12:51:21 +0100647 mdev = idr_get_next(&tconn->volumes, &vnr);
Philipp Reisner695d08f2011-04-11 22:53:32 -0700648 m = mdev ? mdev_to_minor(mdev) : -1;
649 rcu_read_unlock();
650
651 return m;
Philipp Reisner80822282011-02-08 12:46:30 +0100652}
Philipp Reisner774b3052011-02-22 02:07:03 -0500653
654#ifdef CONFIG_SMP
Philipp Reisnerb411b362009-09-25 16:07:19 -0700655/**
656 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
657 * @mdev: DRBD device.
658 *
659 * Forces all threads of a device onto the same CPU. This is beneficial for
660 * DRBD's performance. May be overwritten by user's configuration.
661 */
Philipp Reisner80822282011-02-08 12:46:30 +0100662void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663{
664 int ord, cpu;
665
666 /* user override. */
Philipp Reisner80822282011-02-08 12:46:30 +0100667 if (cpumask_weight(tconn->cpu_mask))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 return;
669
Philipp Reisner80822282011-02-08 12:46:30 +0100670 ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671 for_each_online_cpu(cpu) {
672 if (ord-- == 0) {
Philipp Reisner80822282011-02-08 12:46:30 +0100673 cpumask_set_cpu(cpu, tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 return;
675 }
676 }
677 /* should not be reached */
Philipp Reisner80822282011-02-08 12:46:30 +0100678 cpumask_setall(tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700679}
680
681/**
682 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
683 * @mdev: DRBD device.
Philipp Reisnerbc31fe32011-02-07 11:14:38 +0100684 * @thi: drbd_thread object
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 *
686 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
687 * prematurely.
688 */
Philipp Reisner80822282011-02-08 12:46:30 +0100689void drbd_thread_current_set_cpu(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690{
691 struct task_struct *p = current;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100692
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693 if (!thi->reset_cpu_mask)
694 return;
695 thi->reset_cpu_mask = 0;
Philipp Reisner392c8802011-02-09 10:33:31 +0100696 set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700697}
698#endif
699
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +0200700/**
701 * drbd_header_size - size of a packet header
702 *
703 * The header size is a multiple of 8, so any payload following the header is
704 * word aligned on 64-bit architectures. (The bitmap send and receive code
705 * relies on this.)
706 */
707unsigned int drbd_header_size(struct drbd_tconn *tconn)
708{
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +0200709 if (tconn->agreed_pro_version >= 100) {
710 BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
711 return sizeof(struct p_header100);
712 } else {
713 BUILD_BUG_ON(sizeof(struct p_header80) !=
714 sizeof(struct p_header95));
715 BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
716 return sizeof(struct p_header80);
717 }
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +0200718}
719
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200720static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100721{
722 h->magic = cpu_to_be32(DRBD_MAGIC);
723 h->command = cpu_to_be16(cmd);
724 h->length = cpu_to_be16(size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200725 return sizeof(struct p_header80);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100726}
727
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200728static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100729{
730 h->magic = cpu_to_be16(DRBD_MAGIC_BIG);
731 h->command = cpu_to_be16(cmd);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +0100732 h->length = cpu_to_be32(size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200733 return sizeof(struct p_header95);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100734}
735
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +0200736static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
737 int size, int vnr)
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100738{
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +0200739 h->magic = cpu_to_be32(DRBD_MAGIC_100);
740 h->volume = cpu_to_be16(vnr);
741 h->command = cpu_to_be16(cmd);
742 h->length = cpu_to_be32(size);
743 h->pad = 0;
744 return sizeof(struct p_header100);
745}
746
747static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
748 void *buffer, enum drbd_packet cmd, int size)
749{
750 if (tconn->agreed_pro_version >= 100)
751 return prepare_header100(buffer, cmd, size, vnr);
752 else if (tconn->agreed_pro_version >= 95 &&
753 size > DRBD_MAX_SIZE_H80_PACKET)
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200754 return prepare_header95(buffer, cmd, size);
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100755 else
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200756 return prepare_header80(buffer, cmd, size);
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100757}
758
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200759static void *__conn_prepare_command(struct drbd_tconn *tconn,
760 struct drbd_socket *sock)
761{
762 if (!sock->socket)
763 return NULL;
764 return sock->sbuf + drbd_header_size(tconn);
765}
766
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200767void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock)
768{
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200769 void *p;
770
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200771 mutex_lock(&sock->mutex);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200772 p = __conn_prepare_command(tconn, sock);
773 if (!p)
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200774 mutex_unlock(&sock->mutex);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200775
776 return p;
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200777}
778
779void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock)
780{
781 return conn_prepare_command(mdev->tconn, sock);
782}
783
784static int __send_command(struct drbd_tconn *tconn, int vnr,
785 struct drbd_socket *sock, enum drbd_packet cmd,
786 unsigned int header_size, void *data,
787 unsigned int size)
788{
789 int msg_flags;
790 int err;
791
792 /*
793 * Called with @data == NULL and the size of the data blocks in @size
794 * for commands that send data blocks. For those commands, omit the
795 * MSG_MORE flag: this will increase the likelihood that data blocks
796 * which are page aligned on the sender will end up page aligned on the
797 * receiver.
798 */
799 msg_flags = data ? MSG_MORE : 0;
800
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200801 header_size += prepare_header(tconn, vnr, sock->sbuf, cmd,
802 header_size + size);
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200803 err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size,
804 msg_flags);
805 if (data && !err)
806 err = drbd_send_all(tconn, sock->socket, data, size, 0);
807 return err;
808}
809
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200810static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
811 enum drbd_packet cmd, unsigned int header_size,
812 void *data, unsigned int size)
813{
814 return __send_command(tconn, 0, sock, cmd, header_size, data, size);
815}
816
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200817int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
818 enum drbd_packet cmd, unsigned int header_size,
819 void *data, unsigned int size)
820{
821 int err;
822
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200823 err = __conn_send_command(tconn, sock, cmd, header_size, data, size);
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200824 mutex_unlock(&sock->mutex);
825 return err;
826}
827
828int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock,
829 enum drbd_packet cmd, unsigned int header_size,
830 void *data, unsigned int size)
831{
832 int err;
833
834 err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size,
835 data, size);
836 mutex_unlock(&sock->mutex);
837 return err;
838}
839
Andreas Gruenbachere307f352011-03-22 10:55:48 +0100840int drbd_send_ping(struct drbd_tconn *tconn)
841{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200842 struct drbd_socket *sock;
843
844 sock = &tconn->meta;
845 if (!conn_prepare_command(tconn, sock))
846 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200847 return conn_send_command(tconn, sock, P_PING, 0, NULL, 0);
Andreas Gruenbachere307f352011-03-22 10:55:48 +0100848}
849
850int drbd_send_ping_ack(struct drbd_tconn *tconn)
851{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200852 struct drbd_socket *sock;
853
854 sock = &tconn->meta;
855 if (!conn_prepare_command(tconn, sock))
856 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200857 return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0);
Andreas Gruenbachere307f352011-03-22 10:55:48 +0100858}
859
Lars Ellenbergf3990022011-03-23 14:31:09 +0100860int drbd_send_sync_param(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861{
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100862 struct drbd_socket *sock;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200863 struct p_rs_param_95 *p;
864 int size;
Philipp Reisner31890f42011-01-19 14:12:51 +0100865 const int apv = mdev->tconn->agreed_pro_version;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200866 enum drbd_packet cmd;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200867 struct net_conf *nc;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200868 struct disk_conf *dc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200869
870 sock = &mdev->tconn->data;
871 p = drbd_prepare_command(mdev, sock);
872 if (!p)
873 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874
Philipp Reisner44ed1672011-04-19 17:10:19 +0200875 rcu_read_lock();
876 nc = rcu_dereference(mdev->tconn->net_conf);
877
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878 size = apv <= 87 ? sizeof(struct p_rs_param)
879 : apv == 88 ? sizeof(struct p_rs_param)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200880 + strlen(nc->verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +0200881 : apv <= 94 ? sizeof(struct p_rs_param_89)
882 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200884 cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200886 /* initialize verify_alg and csums_alg */
887 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200889 if (get_ldev(mdev)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200890 dc = rcu_dereference(mdev->ldev->disk_conf);
Andreas Gruenbacher6394b932011-05-11 14:29:52 +0200891 p->resync_rate = cpu_to_be32(dc->resync_rate);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200892 p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
893 p->c_delay_target = cpu_to_be32(dc->c_delay_target);
894 p->c_fill_target = cpu_to_be32(dc->c_fill_target);
895 p->c_max_rate = cpu_to_be32(dc->c_max_rate);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200896 put_ldev(mdev);
897 } else {
Andreas Gruenbacher6394b932011-05-11 14:29:52 +0200898 p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200899 p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
900 p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
901 p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
902 p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
903 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200905 if (apv >= 88)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200906 strcpy(p->verify_alg, nc->verify_alg);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200907 if (apv >= 89)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200908 strcpy(p->csums_alg, nc->csums_alg);
909 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200911 return drbd_send_command(mdev, sock, cmd, size, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912}
913
Philipp Reisnerd659f2a2011-05-16 17:38:45 +0200914int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200916 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 struct p_protocol *p;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200918 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200919 int size, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200921 sock = &tconn->data;
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200922 p = __conn_prepare_command(tconn, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200923 if (!p)
924 return -EIO;
925
Philipp Reisner44ed1672011-04-19 17:10:19 +0200926 rcu_read_lock();
927 nc = rcu_dereference(tconn->net_conf);
928
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +0200929 if (nc->tentative && tconn->agreed_pro_version < 92) {
Philipp Reisner44ed1672011-04-19 17:10:19 +0200930 rcu_read_unlock();
931 mutex_unlock(&sock->mutex);
932 conn_err(tconn, "--dry-run is not supported by peer");
933 return -EOPNOTSUPP;
934 }
935
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200936 size = sizeof(*p);
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100937 if (tconn->agreed_pro_version >= 87)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200938 size += strlen(nc->integrity_alg) + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939
Philipp Reisner44ed1672011-04-19 17:10:19 +0200940 p->protocol = cpu_to_be32(nc->wire_protocol);
941 p->after_sb_0p = cpu_to_be32(nc->after_sb_0p);
942 p->after_sb_1p = cpu_to_be32(nc->after_sb_1p);
943 p->after_sb_2p = cpu_to_be32(nc->after_sb_2p);
944 p->two_primaries = cpu_to_be32(nc->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100945 cf = 0;
Andreas Gruenbacher6139f602011-05-06 20:00:02 +0200946 if (nc->discard_my_data)
947 cf |= CF_DISCARD_MY_DATA;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +0200948 if (nc->tentative)
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200949 cf |= CF_DRY_RUN;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100950 p->conn_flags = cpu_to_be32(cf);
951
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100952 if (tconn->agreed_pro_version >= 87)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200953 strcpy(p->integrity_alg, nc->integrity_alg);
954 rcu_read_unlock();
955
Philipp Reisnerd659f2a2011-05-16 17:38:45 +0200956 return __conn_send_command(tconn, sock, cmd, size, NULL, 0);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200957}
958
959int drbd_send_protocol(struct drbd_tconn *tconn)
960{
961 int err;
962
963 mutex_lock(&tconn->data.mutex);
Philipp Reisnerd659f2a2011-05-16 17:38:45 +0200964 err = __drbd_send_protocol(tconn, P_PROTOCOL);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200965 mutex_unlock(&tconn->data.mutex);
966
967 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968}
969
970int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
971{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200972 struct drbd_socket *sock;
973 struct p_uuids *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974 int i;
975
976 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
Andreas Gruenbacher2ae5f952011-03-16 01:07:20 +0100977 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200979 sock = &mdev->tconn->data;
980 p = drbd_prepare_command(mdev, sock);
981 if (!p) {
982 put_ldev(mdev);
983 return -EIO;
984 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 for (i = UI_CURRENT; i < UI_SIZE; i++)
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200986 p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
988 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200989 p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200990 rcu_read_lock();
Andreas Gruenbacher6139f602011-05-06 20:00:02 +0200991 uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700993 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
994 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200995 p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996
997 put_ldev(mdev);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200998 return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999}
1000
1001int drbd_send_uuids(struct drbd_conf *mdev)
1002{
1003 return _drbd_send_uuids(mdev, 0);
1004}
1005
1006int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
1007{
1008 return _drbd_send_uuids(mdev, 8);
1009}
1010
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001011void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
1012{
1013 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
1014 u64 *uuid = mdev->ldev->md.uuid;
1015 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
1016 text,
1017 (unsigned long long)uuid[UI_CURRENT],
1018 (unsigned long long)uuid[UI_BITMAP],
1019 (unsigned long long)uuid[UI_HISTORY_START],
1020 (unsigned long long)uuid[UI_HISTORY_END]);
1021 put_ldev(mdev);
1022 } else {
1023 dev_info(DEV, "%s effective data uuid: %016llX\n",
1024 text,
1025 (unsigned long long)mdev->ed_uuid);
1026 }
1027}
1028
Andreas Gruenbacher9c1b7f72011-03-16 01:09:01 +01001029void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001031 struct drbd_socket *sock;
1032 struct p_rs_uuid *p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001033 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001035 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
1036
Philipp Reisner4a23f262011-01-11 17:42:17 +01001037 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001038 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001039 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001040 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001041
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001042 sock = &mdev->tconn->data;
1043 p = drbd_prepare_command(mdev, sock);
1044 if (p) {
1045 p->uuid = cpu_to_be64(uuid);
1046 drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0);
1047 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048}
1049
Philipp Reisnere89b5912010-03-24 17:11:33 +01001050int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001052 struct drbd_socket *sock;
1053 struct p_sizes *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054 sector_t d_size, u_size;
Philipp Reisner99432fc2011-05-20 16:39:13 +02001055 int q_order_type, max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056
1057 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
1058 D_ASSERT(mdev->ldev->backing_bdev);
1059 d_size = drbd_get_max_capacity(mdev->ldev);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001060 rcu_read_lock();
1061 u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
1062 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001063 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +02001064 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
1065 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 put_ldev(mdev);
1067 } else {
1068 d_size = 0;
1069 u_size = 0;
1070 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02001071 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 }
1073
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001074 sock = &mdev->tconn->data;
1075 p = drbd_prepare_command(mdev, sock);
1076 if (!p)
1077 return -EIO;
Philipp Reisner2ffca4f2011-06-30 15:43:06 +02001078
1079 if (mdev->tconn->agreed_pro_version <= 94)
1080 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1081 else if (mdev->tconn->agreed_pro_version < 100)
1082 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE_P95);
1083
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001084 p->d_size = cpu_to_be64(d_size);
1085 p->u_size = cpu_to_be64(u_size);
1086 p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
1087 p->max_bio_size = cpu_to_be32(max_bio_size);
1088 p->queue_order_type = cpu_to_be16(q_order_type);
1089 p->dds_flags = cpu_to_be16(flags);
1090 return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091}
1092
1093/**
1094 * drbd_send_state() - Sends the drbd state to the peer
1095 * @mdev: DRBD device.
1096 */
1097int drbd_send_state(struct drbd_conf *mdev)
1098{
Andreas Gruenbacher7c967152011-03-22 00:49:36 +01001099 struct drbd_socket *sock;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001100 struct p_state *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001101
Andreas Gruenbacher7c967152011-03-22 00:49:36 +01001102 sock = &mdev->tconn->data;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001103 p = drbd_prepare_command(mdev, sock);
1104 if (!p)
1105 return -EIO;
1106 p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
1107 return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108}
1109
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001110int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001111{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001112 struct drbd_socket *sock;
1113 struct p_req_state *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001115 sock = &mdev->tconn->data;
1116 p = drbd_prepare_command(mdev, sock);
1117 if (!p)
1118 return -EIO;
1119 p->mask = cpu_to_be32(mask.i);
1120 p->val = cpu_to_be32(val.i);
1121 return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001122
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001123}
1124
1125int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
1126{
1127 enum drbd_packet cmd;
1128 struct drbd_socket *sock;
1129 struct p_req_state *p;
1130
1131 cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ;
1132 sock = &tconn->data;
1133 p = conn_prepare_command(tconn, sock);
1134 if (!p)
1135 return -EIO;
1136 p->mask = cpu_to_be32(mask.i);
1137 p->val = cpu_to_be32(val.i);
1138 return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001139}
1140
Andreas Gruenbacher2f4e7ab2011-03-16 01:20:38 +01001141void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001143 struct drbd_socket *sock;
1144 struct p_req_state_reply *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001146 sock = &mdev->tconn->meta;
1147 p = drbd_prepare_command(mdev, sock);
1148 if (p) {
1149 p->retcode = cpu_to_be32(retcode);
1150 drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0);
1151 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001152}
1153
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001154void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001155{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001156 struct drbd_socket *sock;
1157 struct p_req_state_reply *p;
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001158 enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
1159
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001160 sock = &tconn->meta;
1161 p = conn_prepare_command(tconn, sock);
1162 if (p) {
1163 p->retcode = cpu_to_be32(retcode);
1164 conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
1165 }
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001166}
1167
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001168static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
1169{
1170 BUG_ON(code & ~0xf);
1171 p->encoding = (p->encoding & ~0xf) | code;
1172}
1173
1174static void dcbp_set_start(struct p_compressed_bm *p, int set)
1175{
1176 p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
1177}
1178
1179static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n)
1180{
1181 BUG_ON(n & ~0x7);
1182 p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
1183}
1184
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185int fill_bitmap_rle_bits(struct drbd_conf *mdev,
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001186 struct p_compressed_bm *p,
1187 unsigned int size,
1188 struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001189{
1190 struct bitstream bs;
1191 unsigned long plain_bits;
1192 unsigned long tmp;
1193 unsigned long rl;
1194 unsigned len;
1195 unsigned toggle;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001196 int bits, use_rle;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197
1198 /* may we use this feature? */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001199 rcu_read_lock();
1200 use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle;
1201 rcu_read_unlock();
1202 if (!use_rle || mdev->tconn->agreed_pro_version < 90)
1203 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204
1205 if (c->bit_offset >= c->bm_bits)
1206 return 0; /* nothing to do. */
1207
1208 /* use at most thus many bytes */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001209 bitstream_init(&bs, p->code, size, 0);
1210 memset(p->code, 0, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 /* plain bits covered in this code string */
1212 plain_bits = 0;
1213
1214 /* p->encoding & 0x80 stores whether the first run length is set.
1215 * bit offset is implicit.
1216 * start with toggle == 2 to be able to tell the first iteration */
1217 toggle = 2;
1218
1219 /* see how much plain bits we can stuff into one packet
1220 * using RLE and VLI. */
1221 do {
1222 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
1223 : _drbd_bm_find_next(mdev, c->bit_offset);
1224 if (tmp == -1UL)
1225 tmp = c->bm_bits;
1226 rl = tmp - c->bit_offset;
1227
1228 if (toggle == 2) { /* first iteration */
1229 if (rl == 0) {
1230 /* the first checked bit was set,
1231 * store start value, */
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001232 dcbp_set_start(p, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 /* but skip encoding of zero run length */
1234 toggle = !toggle;
1235 continue;
1236 }
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001237 dcbp_set_start(p, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 }
1239
1240 /* paranoia: catch zero runlength.
1241 * can only happen if bitmap is modified while we scan it. */
1242 if (rl == 0) {
1243 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
1244 "t:%u bo:%lu\n", toggle, c->bit_offset);
1245 return -1;
1246 }
1247
1248 bits = vli_encode_bits(&bs, rl);
1249 if (bits == -ENOBUFS) /* buffer full */
1250 break;
1251 if (bits <= 0) {
1252 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
1253 return 0;
1254 }
1255
1256 toggle = !toggle;
1257 plain_bits += rl;
1258 c->bit_offset = tmp;
1259 } while (c->bit_offset < c->bm_bits);
1260
1261 len = bs.cur.b - p->code + !!bs.cur.bit;
1262
1263 if (plain_bits < (len << 3)) {
1264 /* incompressible with this method.
1265 * we need to rewind both word and bit position. */
1266 c->bit_offset -= plain_bits;
1267 bm_xfer_ctx_bit_to_word_offset(c);
1268 c->bit_offset = c->word_offset * BITS_PER_LONG;
1269 return 0;
1270 }
1271
1272 /* RLE + VLI was able to compress it just fine.
1273 * update c->word_offset. */
1274 bm_xfer_ctx_bit_to_word_offset(c);
1275
1276 /* store pad_bits */
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001277 dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278
1279 return len;
1280}
1281
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001282/**
1283 * send_bitmap_rle_or_plain
1284 *
1285 * Return 0 when done, 1 when another iteration is needed, and a negative error
1286 * code upon failure.
1287 */
1288static int
Andreas Gruenbacher79ed9bd2011-03-24 21:31:38 +01001289send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001291 struct drbd_socket *sock = &mdev->tconn->data;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001292 unsigned int header_size = drbd_header_size(mdev->tconn);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001293 struct p_compressed_bm *p = sock->sbuf + header_size;
Andreas Gruenbachera982dd52010-12-10 00:45:25 +01001294 int len, err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001296 len = fill_bitmap_rle_bits(mdev, p,
1297 DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001299 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300
1301 if (len) {
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001302 dcbp_set_code(p, RLE_VLI_Bits);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001303 err = __send_command(mdev->tconn, mdev->vnr, sock,
1304 P_COMPRESSED_BITMAP, sizeof(*p) + len,
1305 NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001306 c->packets[0]++;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001307 c->bytes[0] += header_size + sizeof(*p) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308
1309 if (c->bit_offset >= c->bm_bits)
1310 len = 0; /* DONE */
1311 } else {
1312 /* was not compressible.
1313 * send a buffer full of plain text bits instead. */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001314 unsigned int data_size;
1315 unsigned long num_words;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001316 unsigned long *p = sock->sbuf + header_size;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001317
1318 data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001319 num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001320 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001321 len = num_words * sizeof(*p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001322 if (len)
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001323 drbd_bm_get_lel(mdev, c->word_offset, num_words, p);
1324 err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001325 c->word_offset += num_words;
1326 c->bit_offset = c->word_offset * BITS_PER_LONG;
1327
1328 c->packets[1]++;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001329 c->bytes[1] += header_size + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330
1331 if (c->bit_offset > c->bm_bits)
1332 c->bit_offset = c->bm_bits;
1333 }
Andreas Gruenbachera982dd52010-12-10 00:45:25 +01001334 if (!err) {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001335 if (len == 0) {
1336 INFO_bm_xfer_stats(mdev, "send", c);
1337 return 0;
1338 } else
1339 return 1;
1340 }
1341 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342}
1343
1344/* See the comment at receive_bitmap() */
Andreas Gruenbacher058820c2011-03-22 16:03:43 +01001345static int _drbd_send_bitmap(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346{
1347 struct bm_xfer_ctx c;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001348 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001350 if (!expect(mdev->bitmap))
1351 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 if (get_ldev(mdev)) {
1354 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1355 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
1356 drbd_bm_set_all(mdev);
1357 if (drbd_bm_write(mdev)) {
1358 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
1359 * but otherwise process as per normal - need to tell other
1360 * side that a full resync is required! */
1361 dev_err(DEV, "Failed to write bitmap to disk!\n");
1362 } else {
1363 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
1364 drbd_md_sync(mdev);
1365 }
1366 }
1367 put_ldev(mdev);
1368 }
1369
1370 c = (struct bm_xfer_ctx) {
1371 .bm_bits = drbd_bm_bits(mdev),
1372 .bm_words = drbd_bm_words(mdev),
1373 };
1374
1375 do {
Andreas Gruenbacher79ed9bd2011-03-24 21:31:38 +01001376 err = send_bitmap_rle_or_plain(mdev, &c);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001377 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001379 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380}
1381
1382int drbd_send_bitmap(struct drbd_conf *mdev)
1383{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001384 struct drbd_socket *sock = &mdev->tconn->data;
1385 int err = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001386
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001387 mutex_lock(&sock->mutex);
1388 if (sock->socket)
1389 err = !_drbd_send_bitmap(mdev);
1390 mutex_unlock(&sock->mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 return err;
1392}
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001393
Andreas Gruenbacherd4e67d72011-03-16 01:25:28 +01001394void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001396 struct drbd_socket *sock;
1397 struct p_barrier_ack *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001398
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001399 if (mdev->state.conn < C_CONNECTED)
1400 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001402 sock = &mdev->tconn->meta;
1403 p = drbd_prepare_command(mdev, sock);
1404 if (!p)
1405 return;
1406 p->barrier = barrier_nr;
1407 p->set_size = cpu_to_be32(set_size);
1408 drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001409}
1410
1411/**
1412 * _drbd_send_ack() - Sends an ack packet
1413 * @mdev: DRBD device.
1414 * @cmd: Packet command code.
1415 * @sector: sector, needs to be in big endian byte order
1416 * @blksize: size in byte, needs to be in big endian byte order
1417 * @block_id: Id, big endian byte order
1418 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001419static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
1420 u64 sector, u32 blksize, u64 block_id)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001422 struct drbd_socket *sock;
1423 struct p_block_ack *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001424
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001425 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbachera8c32aa2011-03-16 01:27:22 +01001426 return -EIO;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001427
1428 sock = &mdev->tconn->meta;
1429 p = drbd_prepare_command(mdev, sock);
1430 if (!p)
1431 return -EIO;
1432 p->sector = sector;
1433 p->block_id = block_id;
1434 p->blksize = blksize;
1435 p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
1436 return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437}
1438
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001439/* dp->sector and dp->block_id already/still in network byte order,
1440 * data_size is payload size according to dp->head,
1441 * and may need to be corrected for digest size. */
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001442void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
1443 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444{
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001445 if (mdev->tconn->peer_integrity_tfm)
1446 data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001447 _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
1448 dp->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449}
1450
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001451void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
1452 struct p_block_req *rp)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001453{
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001454 _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455}
1456
1457/**
1458 * drbd_send_ack() - Sends an ack packet
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001459 * @mdev: DRBD device
1460 * @cmd: packet command code
1461 * @peer_req: peer request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001463int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465{
Andreas Gruenbacherdd516122011-03-16 15:39:08 +01001466 return _drbd_send_ack(mdev, cmd,
1467 cpu_to_be64(peer_req->i.sector),
1468 cpu_to_be32(peer_req->i.size),
1469 peer_req->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470}
1471
1472/* This function misuses the block_id field to signal if the blocks
1473 * are is sync or not. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001474int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475 sector_t sector, int blksize, u64 block_id)
1476{
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001477 return _drbd_send_ack(mdev, cmd,
1478 cpu_to_be64(sector),
1479 cpu_to_be32(blksize),
1480 cpu_to_be64(block_id));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481}
1482
1483int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1484 sector_t sector, int size, u64 block_id)
1485{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001486 struct drbd_socket *sock;
1487 struct p_block_req *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001489 sock = &mdev->tconn->data;
1490 p = drbd_prepare_command(mdev, sock);
1491 if (!p)
1492 return -EIO;
1493 p->sector = cpu_to_be64(sector);
1494 p->block_id = block_id;
1495 p->blksize = cpu_to_be32(size);
1496 return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497}
1498
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001499int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
1500 void *digest, int digest_size, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001502 struct drbd_socket *sock;
1503 struct p_block_req *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001504
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001505 /* FIXME: Put the digest into the preallocated socket buffer. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001507 sock = &mdev->tconn->data;
1508 p = drbd_prepare_command(mdev, sock);
1509 if (!p)
1510 return -EIO;
1511 p->sector = cpu_to_be64(sector);
1512 p->block_id = ID_SYNCER /* unused */;
1513 p->blksize = cpu_to_be32(size);
1514 return drbd_send_command(mdev, sock, cmd, sizeof(*p),
1515 digest, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516}
1517
1518int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
1519{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001520 struct drbd_socket *sock;
1521 struct p_block_req *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001523 sock = &mdev->tconn->data;
1524 p = drbd_prepare_command(mdev, sock);
1525 if (!p)
1526 return -EIO;
1527 p->sector = cpu_to_be64(sector);
1528 p->block_id = ID_SYNCER /* unused */;
1529 p->blksize = cpu_to_be32(size);
1530 return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531}
1532
1533/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001534 * returns false if we should retry,
1535 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536 */
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001537static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538{
1539 int drop_it;
1540 /* long elapsed = (long)(jiffies - mdev->last_received); */
1541
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001542 drop_it = tconn->meta.socket == sock
1543 || !tconn->asender.task
1544 || get_t_state(&tconn->asender) != RUNNING
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001545 || tconn->cstate < C_WF_REPORT_PARAMS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546
1547 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001548 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001550 drop_it = !--tconn->ko_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001551 if (!drop_it) {
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001552 conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
1553 current->comm, current->pid, tconn->ko_count);
1554 request_ping(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 }
1556
1557 return drop_it; /* && (mdev->state == R_PRIMARY) */;
1558}
1559
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001560static void drbd_update_congested(struct drbd_tconn *tconn)
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001561{
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001562 struct sock *sk = tconn->data.socket->sk;
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001563 if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001564 set_bit(NET_CONGESTED, &tconn->flags);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001565}
1566
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567/* The idea of sendpage seems to be to put some kind of reference
1568 * to the page into the skb, and to hand it over to the NIC. In
1569 * this process get_page() gets called.
1570 *
1571 * As soon as the page was really sent over the network put_page()
1572 * gets called by some part of the network layer. [ NIC driver? ]
1573 *
1574 * [ get_page() / put_page() increment/decrement the count. If count
1575 * reaches 0 the page will be freed. ]
1576 *
1577 * This works nicely with pages from FSs.
1578 * But this means that in protocol A we might signal IO completion too early!
1579 *
1580 * In order not to corrupt data during a resync we must make sure
1581 * that we do not reuse our own buffer pages (EEs) to early, therefore
1582 * we have the net_ee list.
1583 *
1584 * XFS seems to have problems, still, it submits pages with page_count == 0!
1585 * As a workaround, we disable sendpage on pages
1586 * with page_count == 0 or PageSlab.
1587 */
1588static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Andreas Gruenbacherb9874272011-03-16 09:41:10 +01001589 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590{
Andreas Gruenbacherb9874272011-03-16 09:41:10 +01001591 struct socket *socket;
1592 void *addr;
1593 int err;
1594
1595 socket = mdev->tconn->data.socket;
1596 addr = kmap(page) + offset;
1597 err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598 kunmap(page);
Andreas Gruenbacherb9874272011-03-16 09:41:10 +01001599 if (!err)
1600 mdev->send_cnt += size >> 9;
1601 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602}
1603
1604static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001605 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606{
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001607 struct socket *socket = mdev->tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608 mm_segment_t oldfs = get_fs();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609 int len = size;
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001610 int err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611
1612 /* e.g. XFS meta- & log-data is in slab pages, which have a
1613 * page_count of 0 and/or have PageSlab() set.
1614 * we cannot use send_page for those, as that does get_page();
1615 * put_page(); and would cause either a VM_BUG directly, or
1616 * __page_cache_release a page that would actually still be referenced
1617 * by someone, leading to some obscure delayed Oops somewhere else. */
1618 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001619 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001621 msg_flags |= MSG_NOSIGNAL;
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001622 drbd_update_congested(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001623 set_fs(KERNEL_DS);
1624 do {
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001625 int sent;
1626
1627 sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628 if (sent <= 0) {
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001629 if (sent == -EAGAIN) {
1630 if (we_should_drop_the_connection(mdev->tconn, socket))
1631 break;
1632 continue;
1633 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001634 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
1635 __func__, (int)size, len, sent);
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001636 if (sent < 0)
1637 err = sent;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638 break;
1639 }
1640 len -= sent;
1641 offset += sent;
1642 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
1643 set_fs(oldfs);
Philipp Reisner01a311a2011-02-07 14:30:33 +01001644 clear_bit(NET_CONGESTED, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001646 if (len == 0) {
1647 err = 0;
1648 mdev->send_cnt += size >> 9;
1649 }
1650 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651}
1652
1653static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1654{
1655 struct bio_vec *bvec;
1656 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001657 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658 __bio_for_each_segment(bvec, bio, i, 0) {
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001659 int err;
1660
1661 err = _drbd_no_send_page(mdev, bvec->bv_page,
1662 bvec->bv_offset, bvec->bv_len,
1663 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
1664 if (err)
1665 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666 }
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001667 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668}
1669
1670static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1671{
1672 struct bio_vec *bvec;
1673 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001674 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675 __bio_for_each_segment(bvec, bio, i, 0) {
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001676 int err;
1677
1678 err = _drbd_send_page(mdev, bvec->bv_page,
1679 bvec->bv_offset, bvec->bv_len,
1680 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
1681 if (err)
1682 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683 }
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001684 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685}
1686
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001687static int _drbd_send_zc_ee(struct drbd_conf *mdev,
1688 struct drbd_peer_request *peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001689{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001690 struct page *page = peer_req->pages;
1691 unsigned len = peer_req->i.size;
Andreas Gruenbacher9f692302011-03-16 10:49:09 +01001692 int err;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001693
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001694 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001695 page_chain_for_each(page) {
1696 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Andreas Gruenbacher9f692302011-03-16 10:49:09 +01001697
1698 err = _drbd_send_page(mdev, page, 0, l,
1699 page_chain_next(page) ? MSG_MORE : 0);
1700 if (err)
1701 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001702 len -= l;
1703 }
Andreas Gruenbacher9f692302011-03-16 10:49:09 +01001704 return 0;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001705}
1706
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001707static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
1708{
Philipp Reisner31890f42011-01-19 14:12:51 +01001709 if (mdev->tconn->agreed_pro_version >= 95)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001710 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001711 (bi_rw & REQ_FUA ? DP_FUA : 0) |
1712 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
1713 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
1714 else
Jens Axboe721a9602011-03-09 11:56:30 +01001715 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001716}
1717
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718/* Used to send write requests
1719 * R_PRIMARY -> Peer (P_DATA)
1720 */
1721int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
1722{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001723 struct drbd_socket *sock;
1724 struct p_data *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725 unsigned int dp_flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726 int dgs;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001727 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001728
Philipp Reisner46e1ce42011-05-16 12:57:15 +02001729 sock = &mdev->tconn->data;
1730 p = drbd_prepare_command(mdev, sock);
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001731 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ?
1732 crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001734 if (!p)
1735 return -EIO;
1736 p->sector = cpu_to_be64(req->i.sector);
1737 p->block_id = (unsigned long)req;
1738 p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq));
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001739 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 if (mdev->state.conn >= C_SYNC_SOURCE &&
1741 mdev->state.conn <= C_PAUSED_SYNC_T)
1742 dp_flags |= DP_MAY_SET_IN_SYNC;
Philipp Reisner303d1442011-04-13 16:24:47 -07001743 if (mdev->tconn->agreed_pro_version >= 100) {
1744 if (req->rq_state & RQ_EXP_RECEIVE_ACK)
1745 dp_flags |= DP_SEND_RECEIVE_ACK;
1746 if (req->rq_state & RQ_EXP_WRITE_ACK)
1747 dp_flags |= DP_SEND_WRITE_ACK;
1748 }
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001749 p->dp_flags = cpu_to_be32(dp_flags);
1750 if (dgs)
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001751 drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001752 err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001753 if (!err) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001754 /* For protocol A, we have to memcpy the payload into
1755 * socket buffers, as we may complete right away
1756 * as soon as we handed it over to tcp, at which point the data
1757 * pages may become invalid.
1758 *
1759 * For data-integrity enabled, we copy it as well, so we can be
1760 * sure that even if the bio pages may still be modified, it
1761 * won't change the data on the wire, thus if the digest checks
1762 * out ok after sending on this side, but does not fit on the
1763 * receiving side, we sure have detected corruption elsewhere.
1764 */
Philipp Reisner303d1442011-04-13 16:24:47 -07001765 if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs)
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001766 err = _drbd_send_bio(mdev, req->master_bio);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767 else
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001768 err = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01001769
1770 /* double check digest, sometimes buffers have been modified in flight. */
1771 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02001772 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01001773 * currently supported in kernel crypto. */
1774 unsigned char digest[64];
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001775 drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001776 if (memcmp(p + 1, digest, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001777 dev_warn(DEV,
1778 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001779 (unsigned long long)req->i.sector, req->i.size);
Lars Ellenberg470be442010-11-10 10:36:52 +01001780 }
1781 } /* else if (dgs > 64) {
1782 ... Be noisy about digest too large ...
1783 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 }
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001785 mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001786
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001787 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788}
1789
1790/* answer packet, used to send data back for read requests:
1791 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
1792 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
1793 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001794int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001795 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001797 struct drbd_socket *sock;
1798 struct p_data *p;
Andreas Gruenbacher7b57b89d2011-03-16 11:35:20 +01001799 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800 int dgs;
1801
Philipp Reisner46e1ce42011-05-16 12:57:15 +02001802 sock = &mdev->tconn->data;
1803 p = drbd_prepare_command(mdev, sock);
1804
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001805 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ?
1806 crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001808 if (!p)
1809 return -EIO;
1810 p->sector = cpu_to_be64(peer_req->i.sector);
1811 p->block_id = peer_req->block_id;
1812 p->seq_num = 0; /* unused */
1813 if (dgs)
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001814 drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001815 err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size);
Andreas Gruenbacher7b57b89d2011-03-16 11:35:20 +01001816 if (!err)
1817 err = _drbd_send_zc_ee(mdev, peer_req);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001818 mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001819
Andreas Gruenbacher7b57b89d2011-03-16 11:35:20 +01001820 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821}
1822
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001823int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001824{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001825 struct drbd_socket *sock;
1826 struct p_block_desc *p;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001827
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001828 sock = &mdev->tconn->data;
1829 p = drbd_prepare_command(mdev, sock);
1830 if (!p)
1831 return -EIO;
1832 p->sector = cpu_to_be64(req->i.sector);
1833 p->blksize = cpu_to_be32(req->i.size);
1834 return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001835}
1836
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837/*
1838 drbd_send distinguishes two cases:
1839
1840 Packets sent via the data socket "sock"
1841 and packets sent via the meta data socket "msock"
1842
1843 sock msock
1844 -----------------+-------------------------+------------------------------
1845 timeout conf.timeout / 2 conf.timeout / 2
1846 timeout action send a ping via msock Abort communication
1847 and close all sockets
1848*/
1849
1850/*
1851 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
1852 */
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001853int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854 void *buf, size_t size, unsigned msg_flags)
1855{
1856 struct kvec iov;
1857 struct msghdr msg;
1858 int rv, sent = 0;
1859
1860 if (!sock)
Andreas Gruenbacherc0d42c82010-12-09 23:52:22 +01001861 return -EBADR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001862
1863 /* THINK if (signal_pending) return ... ? */
1864
1865 iov.iov_base = buf;
1866 iov.iov_len = size;
1867
1868 msg.msg_name = NULL;
1869 msg.msg_namelen = 0;
1870 msg.msg_control = NULL;
1871 msg.msg_controllen = 0;
1872 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
1873
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001874 if (sock == tconn->data.socket) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02001875 rcu_read_lock();
1876 tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count;
1877 rcu_read_unlock();
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001878 drbd_update_congested(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879 }
1880 do {
1881 /* STRANGE
1882 * tcp_sendmsg does _not_ use its size parameter at all ?
1883 *
1884 * -EAGAIN on timeout, -EINTR on signal.
1885 */
1886/* THINK
1887 * do we need to block DRBD_SIG if sock == &meta.socket ??
1888 * otherwise wake_asender() might interrupt some send_*Ack !
1889 */
1890 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
1891 if (rv == -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001892 if (we_should_drop_the_connection(tconn, sock))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001893 break;
1894 else
1895 continue;
1896 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001897 if (rv == -EINTR) {
1898 flush_signals(current);
1899 rv = 0;
1900 }
1901 if (rv < 0)
1902 break;
1903 sent += rv;
1904 iov.iov_base += rv;
1905 iov.iov_len -= rv;
1906 } while (sent < size);
1907
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001908 if (sock == tconn->data.socket)
1909 clear_bit(NET_CONGESTED, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910
1911 if (rv <= 0) {
1912 if (rv != -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001913 conn_err(tconn, "%s_sendmsg returned %d\n",
1914 sock == tconn->meta.socket ? "msock" : "sock",
1915 rv);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001916 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917 } else
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001918 conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001919 }
1920
1921 return sent;
1922}
1923
Andreas Gruenbacherfb708e42010-12-15 17:04:36 +01001924/**
1925 * drbd_send_all - Send an entire buffer
1926 *
1927 * Returns 0 upon success and a negative error value otherwise.
1928 */
1929int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
1930 size_t size, unsigned msg_flags)
1931{
1932 int err;
1933
1934 err = drbd_send(tconn, sock, buffer, size, msg_flags);
1935 if (err < 0)
1936 return err;
1937 if (err != size)
1938 return -EIO;
1939 return 0;
1940}
1941
Philipp Reisnerb411b362009-09-25 16:07:19 -07001942static int drbd_open(struct block_device *bdev, fmode_t mode)
1943{
1944 struct drbd_conf *mdev = bdev->bd_disk->private_data;
1945 unsigned long flags;
1946 int rv = 0;
1947
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001948 mutex_lock(&drbd_main_mutex);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001949 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001950 /* to have a stable mdev->state.role
1951 * and no race with updating open_cnt */
1952
1953 if (mdev->state.role != R_PRIMARY) {
1954 if (mode & FMODE_WRITE)
1955 rv = -EROFS;
1956 else if (!allow_oos)
1957 rv = -EMEDIUMTYPE;
1958 }
1959
1960 if (!rv)
1961 mdev->open_cnt++;
Philipp Reisner87eeee42011-01-19 14:16:30 +01001962 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001963 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001964
1965 return rv;
1966}
1967
1968static int drbd_release(struct gendisk *gd, fmode_t mode)
1969{
1970 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001971 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001973 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974 return 0;
1975}
1976
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977static void drbd_set_defaults(struct drbd_conf *mdev)
1978{
Lars Ellenbergf3990022011-03-23 14:31:09 +01001979 /* Beware! The actual layout differs
1980 * between big endian and little endian */
Philipp Reisnerda9fbc22011-03-29 10:52:01 +02001981 mdev->state = (union drbd_dev_state) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982 { .role = R_SECONDARY,
1983 .peer = R_UNKNOWN,
1984 .conn = C_STANDALONE,
1985 .disk = D_DISKLESS,
1986 .pdsk = D_UNKNOWN,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001987 } };
1988}
1989
1990void drbd_init_set_defaults(struct drbd_conf *mdev)
1991{
1992 /* the memset(,0,) did most of this.
1993 * note: only assignments, no allocation in here */
1994
1995 drbd_set_defaults(mdev);
1996
Philipp Reisnerb411b362009-09-25 16:07:19 -07001997 atomic_set(&mdev->ap_bio_cnt, 0);
1998 atomic_set(&mdev->ap_pending_cnt, 0);
1999 atomic_set(&mdev->rs_pending_cnt, 0);
2000 atomic_set(&mdev->unacked_cnt, 0);
2001 atomic_set(&mdev->local_cnt, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02002002 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02002003 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002004 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02002005 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002006
2007 mutex_init(&mdev->md_io_mutex);
Philipp Reisner8410da8f02011-02-11 20:11:10 +01002008 mutex_init(&mdev->own_state_mutex);
2009 mdev->state_mutex = &mdev->own_state_mutex;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002010
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011 spin_lock_init(&mdev->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 spin_lock_init(&mdev->peer_seq_lock);
2013 spin_lock_init(&mdev->epoch_lock);
2014
2015 INIT_LIST_HEAD(&mdev->active_ee);
2016 INIT_LIST_HEAD(&mdev->sync_ee);
2017 INIT_LIST_HEAD(&mdev->done_ee);
2018 INIT_LIST_HEAD(&mdev->read_ee);
2019 INIT_LIST_HEAD(&mdev->net_ee);
2020 INIT_LIST_HEAD(&mdev->resync_reads);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 INIT_LIST_HEAD(&mdev->resync_work.list);
2022 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002023 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02002025 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02002027
Philipp Reisner794abb72010-12-27 11:51:23 +01002028 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002029 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002030 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 mdev->md_sync_work.cb = w_md_sync;
2032 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01002033 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnera21e9292011-02-08 15:08:49 +01002034
2035 mdev->resync_work.mdev = mdev;
2036 mdev->unplug_work.mdev = mdev;
2037 mdev->go_diskless.mdev = mdev;
2038 mdev->md_sync_work.mdev = mdev;
2039 mdev->bm_io_work.w.mdev = mdev;
2040 mdev->start_resync_work.mdev = mdev;
2041
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042 init_timer(&mdev->resync_timer);
2043 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01002044 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01002045 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002046 mdev->resync_timer.function = resync_timer_fn;
2047 mdev->resync_timer.data = (unsigned long) mdev;
2048 mdev->md_sync_timer.function = md_sync_timer_fn;
2049 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01002050 mdev->start_resync_timer.function = start_resync_timer_fn;
2051 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01002052 mdev->request_timer.function = request_timer_fn;
2053 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054
2055 init_waitqueue_head(&mdev->misc_wait);
2056 init_waitqueue_head(&mdev->state_wait);
2057 init_waitqueue_head(&mdev->ee_wait);
2058 init_waitqueue_head(&mdev->al_wait);
2059 init_waitqueue_head(&mdev->seq_wait);
2060
Philipp Reisner2451fc32010-08-24 13:43:11 +02002061 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002062 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002063 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
2064 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002065}
2066
2067void drbd_mdev_cleanup(struct drbd_conf *mdev)
2068{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02002069 int i;
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01002070 if (mdev->tconn->receiver.t_state != NONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01002072 mdev->tconn->receiver.t_state);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002073
2074 /* no need to lock it, I'm the only thread alive */
2075 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
2076 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
2077 mdev->al_writ_cnt =
2078 mdev->bm_writ_cnt =
2079 mdev->read_cnt =
2080 mdev->recv_cnt =
2081 mdev->send_cnt =
2082 mdev->writ_cnt =
2083 mdev->p_size =
2084 mdev->rs_start =
2085 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02002086 mdev->rs_failed = 0;
2087 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002088 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02002089 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2090 mdev->rs_mark_left[i] = 0;
2091 mdev->rs_mark_time[i] = 0;
2092 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002093 D_ASSERT(mdev->tconn->net_conf == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094
2095 drbd_set_my_capacity(mdev, 0);
2096 if (mdev->bitmap) {
2097 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01002098 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002099 drbd_bm_cleanup(mdev);
2100 }
2101
Philipp Reisner1d041222011-04-22 15:20:23 +02002102 drbd_free_bc(mdev->ldev);
2103 mdev->ldev = NULL;
2104
Philipp Reisner07782862010-08-31 12:00:50 +02002105 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002106
Philipp Reisnerb411b362009-09-25 16:07:19 -07002107 D_ASSERT(list_empty(&mdev->active_ee));
2108 D_ASSERT(list_empty(&mdev->sync_ee));
2109 D_ASSERT(list_empty(&mdev->done_ee));
2110 D_ASSERT(list_empty(&mdev->read_ee));
2111 D_ASSERT(list_empty(&mdev->net_ee));
2112 D_ASSERT(list_empty(&mdev->resync_reads));
Philipp Reisnere42325a2011-01-19 13:55:45 +01002113 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
2114 D_ASSERT(list_empty(&mdev->tconn->meta.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002115 D_ASSERT(list_empty(&mdev->resync_work.list));
2116 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002117 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01002118
2119 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002120}
2121
2122
2123static void drbd_destroy_mempools(void)
2124{
2125 struct page *page;
2126
2127 while (drbd_pp_pool) {
2128 page = drbd_pp_pool;
2129 drbd_pp_pool = (struct page *)page_private(page);
2130 __free_page(page);
2131 drbd_pp_vacant--;
2132 }
2133
2134 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
2135
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002136 if (drbd_md_io_bio_set)
2137 bioset_free(drbd_md_io_bio_set);
Lars Ellenberg35abf592011-02-23 12:39:46 +01002138 if (drbd_md_io_page_pool)
2139 mempool_destroy(drbd_md_io_page_pool);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002140 if (drbd_ee_mempool)
2141 mempool_destroy(drbd_ee_mempool);
2142 if (drbd_request_mempool)
2143 mempool_destroy(drbd_request_mempool);
2144 if (drbd_ee_cache)
2145 kmem_cache_destroy(drbd_ee_cache);
2146 if (drbd_request_cache)
2147 kmem_cache_destroy(drbd_request_cache);
2148 if (drbd_bm_ext_cache)
2149 kmem_cache_destroy(drbd_bm_ext_cache);
2150 if (drbd_al_ext_cache)
2151 kmem_cache_destroy(drbd_al_ext_cache);
2152
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002153 drbd_md_io_bio_set = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01002154 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002155 drbd_ee_mempool = NULL;
2156 drbd_request_mempool = NULL;
2157 drbd_ee_cache = NULL;
2158 drbd_request_cache = NULL;
2159 drbd_bm_ext_cache = NULL;
2160 drbd_al_ext_cache = NULL;
2161
2162 return;
2163}
2164
2165static int drbd_create_mempools(void)
2166{
2167 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002168 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169 int i;
2170
2171 /* prepare our caches and mempools */
2172 drbd_request_mempool = NULL;
2173 drbd_ee_cache = NULL;
2174 drbd_request_cache = NULL;
2175 drbd_bm_ext_cache = NULL;
2176 drbd_al_ext_cache = NULL;
2177 drbd_pp_pool = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01002178 drbd_md_io_page_pool = NULL;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002179 drbd_md_io_bio_set = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180
2181 /* caches */
2182 drbd_request_cache = kmem_cache_create(
2183 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
2184 if (drbd_request_cache == NULL)
2185 goto Enomem;
2186
2187 drbd_ee_cache = kmem_cache_create(
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01002188 "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189 if (drbd_ee_cache == NULL)
2190 goto Enomem;
2191
2192 drbd_bm_ext_cache = kmem_cache_create(
2193 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
2194 if (drbd_bm_ext_cache == NULL)
2195 goto Enomem;
2196
2197 drbd_al_ext_cache = kmem_cache_create(
2198 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
2199 if (drbd_al_ext_cache == NULL)
2200 goto Enomem;
2201
2202 /* mempools */
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002203 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
2204 if (drbd_md_io_bio_set == NULL)
2205 goto Enomem;
2206
Lars Ellenberg35abf592011-02-23 12:39:46 +01002207 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
2208 if (drbd_md_io_page_pool == NULL)
2209 goto Enomem;
2210
Philipp Reisnerb411b362009-09-25 16:07:19 -07002211 drbd_request_mempool = mempool_create(number,
2212 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
2213 if (drbd_request_mempool == NULL)
2214 goto Enomem;
2215
2216 drbd_ee_mempool = mempool_create(number,
2217 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06002218 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219 goto Enomem;
2220
2221 /* drbd's page pool */
2222 spin_lock_init(&drbd_pp_lock);
2223
2224 for (i = 0; i < number; i++) {
2225 page = alloc_page(GFP_HIGHUSER);
2226 if (!page)
2227 goto Enomem;
2228 set_page_private(page, (unsigned long)drbd_pp_pool);
2229 drbd_pp_pool = page;
2230 }
2231 drbd_pp_vacant = number;
2232
2233 return 0;
2234
2235Enomem:
2236 drbd_destroy_mempools(); /* in case we allocated some */
2237 return -ENOMEM;
2238}
2239
2240static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
2241 void *unused)
2242{
2243 /* just so we have it. you never know what interesting things we
2244 * might want to do here some day...
2245 */
2246
2247 return NOTIFY_DONE;
2248}
2249
2250static struct notifier_block drbd_notifier = {
2251 .notifier_call = drbd_notify_sys,
2252};
2253
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002254static void drbd_release_all_peer_reqs(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255{
2256 int rr;
2257
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002258 rr = drbd_free_peer_reqs(mdev, &mdev->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002259 if (rr)
2260 dev_err(DEV, "%d EEs in active list found!\n", rr);
2261
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002262 rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002263 if (rr)
2264 dev_err(DEV, "%d EEs in sync list found!\n", rr);
2265
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002266 rr = drbd_free_peer_reqs(mdev, &mdev->read_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 if (rr)
2268 dev_err(DEV, "%d EEs in read list found!\n", rr);
2269
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002270 rr = drbd_free_peer_reqs(mdev, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002271 if (rr)
2272 dev_err(DEV, "%d EEs in done list found!\n", rr);
2273
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002274 rr = drbd_free_peer_reqs(mdev, &mdev->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275 if (rr)
2276 dev_err(DEV, "%d EEs in net list found!\n", rr);
2277}
2278
Philipp Reisner774b3052011-02-22 02:07:03 -05002279/* caution. no locking. */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002280void drbd_minor_destroy(struct kref *kref)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002281{
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002282 struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002283 struct drbd_tconn *tconn = mdev->tconn;
2284
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 /* paranoia asserts */
Andreas Gruenbacher70dc65e2010-12-21 14:46:57 +01002286 D_ASSERT(mdev->open_cnt == 0);
Philipp Reisnere42325a2011-01-19 13:55:45 +01002287 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 /* end paranoia asserts */
2289
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290 /* cleanup stuff that may have been allocated during
2291 * device (re-)configuration or state changes */
2292
2293 if (mdev->this_bdev)
2294 bdput(mdev->this_bdev);
2295
Philipp Reisner1d041222011-04-22 15:20:23 +02002296 drbd_free_bc(mdev->ldev);
2297 mdev->ldev = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002299 drbd_release_all_peer_reqs(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002300
Philipp Reisnerb411b362009-09-25 16:07:19 -07002301 lc_destroy(mdev->act_log);
2302 lc_destroy(mdev->resync);
2303
2304 kfree(mdev->p_uuid);
2305 /* mdev->p_uuid = NULL; */
2306
Philipp Reisnercd1d9952011-04-11 21:24:24 -07002307 kfree(mdev->current_epoch);
2308 if (mdev->bitmap) /* should no longer be there. */
2309 drbd_bm_cleanup(mdev);
2310 __free_page(mdev->md_io_page);
2311 put_disk(mdev->vdisk);
2312 blk_cleanup_queue(mdev->rq_queue);
Philipp Reisner9958c852011-05-03 16:19:31 +02002313 kfree(mdev->rs_plan_s);
Philipp Reisnercd1d9952011-04-11 21:24:24 -07002314 kfree(mdev);
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002315
2316 kref_put(&tconn->kref, &conn_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317}
2318
2319static void drbd_cleanup(void)
2320{
2321 unsigned int i;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002322 struct drbd_conf *mdev;
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002323 struct drbd_tconn *tconn, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324
2325 unregister_reboot_notifier(&drbd_notifier);
2326
Lars Ellenberg17a93f32010-11-24 10:37:35 +01002327 /* first remove proc,
2328 * drbdsetup uses it's presence to detect
2329 * whether DRBD is loaded.
2330 * If we would get stuck in proc removal,
2331 * but have netlink already deregistered,
2332 * some drbdsetup commands may wait forever
2333 * for an answer.
2334 */
2335 if (drbd_proc)
2336 remove_proc_entry("drbd", NULL);
2337
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002338 drbd_genl_unregister();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002339
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002340 idr_for_each_entry(&minors, mdev, i) {
2341 idr_remove(&minors, mdev_to_minor(mdev));
2342 idr_remove(&mdev->tconn->volumes, mdev->vnr);
2343 del_gendisk(mdev->vdisk);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002344 /* synchronize_rcu(); No other threads running at this point */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002345 kref_put(&mdev->kref, &drbd_minor_destroy);
2346 }
2347
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002348 /* not _rcu since, no other updater anymore. Genl already unregistered */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002349 list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002350 list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */
2351 /* synchronize_rcu(); */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002352 kref_put(&tconn->kref, &conn_destroy);
2353 }
Philipp Reisnerff370e52011-04-11 21:10:11 -07002354
Philipp Reisner81a5d602011-02-22 19:53:16 -05002355 drbd_destroy_mempools();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002356 unregister_blkdev(DRBD_MAJOR, "drbd");
2357
Philipp Reisner81a5d602011-02-22 19:53:16 -05002358 idr_destroy(&minors);
2359
Philipp Reisnerb411b362009-09-25 16:07:19 -07002360 printk(KERN_INFO "drbd: module cleanup done.\n");
2361}
2362
2363/**
2364 * drbd_congested() - Callback for pdflush
2365 * @congested_data: User data
2366 * @bdi_bits: Bits pdflush is currently interested in
2367 *
2368 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
2369 */
2370static int drbd_congested(void *congested_data, int bdi_bits)
2371{
2372 struct drbd_conf *mdev = congested_data;
2373 struct request_queue *q;
2374 char reason = '-';
2375 int r = 0;
2376
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01002377 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002378 /* DRBD has frozen IO */
2379 r = bdi_bits;
2380 reason = 'd';
2381 goto out;
2382 }
2383
2384 if (get_ldev(mdev)) {
2385 q = bdev_get_queue(mdev->ldev->backing_bdev);
2386 r = bdi_congested(&q->backing_dev_info, bdi_bits);
2387 put_ldev(mdev);
2388 if (r)
2389 reason = 'b';
2390 }
2391
Philipp Reisner01a311a2011-02-07 14:30:33 +01002392 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002393 r |= (1 << BDI_async_congested);
2394 reason = reason == 'b' ? 'a' : 'n';
2395 }
2396
2397out:
2398 mdev->congestion_reason = reason;
2399 return r;
2400}
2401
Philipp Reisner6699b652011-02-09 11:10:24 +01002402static void drbd_init_workqueue(struct drbd_work_queue* wq)
2403{
2404 sema_init(&wq->s, 0);
2405 spin_lock_init(&wq->q_lock);
2406 INIT_LIST_HEAD(&wq->q);
2407}
2408
Philipp Reisner0ace9df2011-04-24 10:53:19 +02002409struct drbd_tconn *conn_get_by_name(const char *name)
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002410{
2411 struct drbd_tconn *tconn;
2412
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002413 if (!name || !name[0])
2414 return NULL;
2415
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002416 rcu_read_lock();
Philipp Reisnerec0bddb2011-05-04 15:47:01 +02002417 list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
Philipp Reisner0ace9df2011-04-24 10:53:19 +02002418 if (!strcmp(tconn->name, name)) {
2419 kref_get(&tconn->kref);
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002420 goto found;
Philipp Reisner0ace9df2011-04-24 10:53:19 +02002421 }
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002422 }
2423 tconn = NULL;
2424found:
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002425 rcu_read_unlock();
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002426 return tconn;
2427}
2428
Andreas Gruenbacher089c0752011-06-14 18:28:09 +02002429struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
2430 void *peer_addr, int peer_addr_len)
2431{
2432 struct drbd_tconn *tconn;
2433
2434 rcu_read_lock();
2435 list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
2436 if (tconn->my_addr_len == my_addr_len &&
2437 tconn->peer_addr_len == peer_addr_len &&
2438 !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
2439 !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
2440 kref_get(&tconn->kref);
2441 goto found;
2442 }
2443 }
2444 tconn = NULL;
2445found:
2446 rcu_read_unlock();
2447 return tconn;
2448}
2449
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002450static int drbd_alloc_socket(struct drbd_socket *socket)
2451{
2452 socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
2453 if (!socket->rbuf)
2454 return -ENOMEM;
Andreas Gruenbacher5a87d922011-03-24 21:17:52 +01002455 socket->sbuf = (void *) __get_free_page(GFP_KERNEL);
2456 if (!socket->sbuf)
2457 return -ENOMEM;
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002458 return 0;
2459}
2460
2461static void drbd_free_socket(struct drbd_socket *socket)
2462{
Andreas Gruenbacher5a87d922011-03-24 21:17:52 +01002463 free_page((unsigned long) socket->sbuf);
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002464 free_page((unsigned long) socket->rbuf);
2465}
2466
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002467void conn_free_crypto(struct drbd_tconn *tconn)
2468{
Philipp Reisner1d041222011-04-22 15:20:23 +02002469 drbd_free_sock(tconn);
2470
2471 crypto_free_hash(tconn->csums_tfm);
2472 crypto_free_hash(tconn->verify_tfm);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002473 crypto_free_hash(tconn->cram_hmac_tfm);
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02002474 crypto_free_hash(tconn->integrity_tfm);
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02002475 crypto_free_hash(tconn->peer_integrity_tfm);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002476 kfree(tconn->int_dig_in);
2477 kfree(tconn->int_dig_vv);
Philipp Reisner1d041222011-04-22 15:20:23 +02002478
2479 tconn->csums_tfm = NULL;
2480 tconn->verify_tfm = NULL;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002481 tconn->cram_hmac_tfm = NULL;
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02002482 tconn->integrity_tfm = NULL;
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02002483 tconn->peer_integrity_tfm = NULL;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002484 tconn->int_dig_in = NULL;
2485 tconn->int_dig_vv = NULL;
2486}
2487
Andreas Gruenbacherafbbfa82011-06-16 17:58:02 +02002488int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts)
2489{
2490 cpumask_var_t new_cpu_mask;
2491 int err;
2492
2493 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL))
2494 return -ENOMEM;
2495 /*
2496 retcode = ERR_NOMEM;
2497 drbd_msg_put_info("unable to allocate cpumask");
2498 */
2499
2500 /* silently ignore cpu mask on UP kernel */
2501 if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
2502 /* FIXME: Get rid of constant 32 here */
2503 err = __bitmap_parse(res_opts->cpu_mask, 32, 0,
2504 cpumask_bits(new_cpu_mask), nr_cpu_ids);
2505 if (err) {
2506 conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
2507 /* retcode = ERR_CPU_MASK_PARSE; */
2508 goto fail;
2509 }
2510 }
2511 tconn->res_opts = *res_opts;
2512 if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
2513 cpumask_copy(tconn->cpu_mask, new_cpu_mask);
2514 drbd_calc_cpu_mask(tconn);
2515 tconn->receiver.reset_cpu_mask = 1;
2516 tconn->asender.reset_cpu_mask = 1;
2517 tconn->worker.reset_cpu_mask = 1;
2518 }
2519 err = 0;
2520
2521fail:
2522 free_cpumask_var(new_cpu_mask);
2523 return err;
2524
2525}
2526
Philipp Reisnerec0bddb2011-05-04 15:47:01 +02002527/* caller must be under genl_lock() */
Andreas Gruenbacherafbbfa82011-06-16 17:58:02 +02002528struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
Philipp Reisner21114382011-01-19 12:26:59 +01002529{
2530 struct drbd_tconn *tconn;
2531
2532 tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
2533 if (!tconn)
2534 return NULL;
2535
2536 tconn->name = kstrdup(name, GFP_KERNEL);
2537 if (!tconn->name)
2538 goto fail;
2539
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002540 if (drbd_alloc_socket(&tconn->data))
2541 goto fail;
2542 if (drbd_alloc_socket(&tconn->meta))
2543 goto fail;
2544
Philipp Reisner774b3052011-02-22 02:07:03 -05002545 if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
2546 goto fail;
2547
Andreas Gruenbacherafbbfa82011-06-16 17:58:02 +02002548 if (set_resource_options(tconn, res_opts))
2549 goto fail;
2550
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002551 if (!tl_init(tconn))
2552 goto fail;
2553
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01002554 tconn->cstate = C_STANDALONE;
Philipp Reisner8410da8f02011-02-11 20:11:10 +01002555 mutex_init(&tconn->cstate_mutex);
Philipp Reisner6699b652011-02-09 11:10:24 +01002556 spin_lock_init(&tconn->req_lock);
Philipp Reisnera0095502011-05-03 13:14:15 +02002557 mutex_init(&tconn->conf_update);
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01002558 init_waitqueue_head(&tconn->ping_wait);
Philipp Reisner062e8792011-02-08 11:09:18 +01002559 idr_init(&tconn->volumes);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01002560
Philipp Reisner6699b652011-02-09 11:10:24 +01002561 drbd_init_workqueue(&tconn->data.work);
2562 mutex_init(&tconn->data.mutex);
2563
2564 drbd_init_workqueue(&tconn->meta.work);
2565 mutex_init(&tconn->meta.mutex);
2566
Philipp Reisner392c8802011-02-09 10:33:31 +01002567 drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
2568 drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
2569 drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
2570
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002571 kref_init(&tconn->kref);
Philipp Reisnerec0bddb2011-05-04 15:47:01 +02002572 list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns);
Philipp Reisner21114382011-01-19 12:26:59 +01002573
2574 return tconn;
2575
2576fail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002577 tl_cleanup(tconn);
Philipp Reisner774b3052011-02-22 02:07:03 -05002578 free_cpumask_var(tconn->cpu_mask);
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002579 drbd_free_socket(&tconn->meta);
2580 drbd_free_socket(&tconn->data);
Philipp Reisner21114382011-01-19 12:26:59 +01002581 kfree(tconn->name);
2582 kfree(tconn);
2583
2584 return NULL;
2585}
2586
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002587void conn_destroy(struct kref *kref)
Philipp Reisner21114382011-01-19 12:26:59 +01002588{
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002589 struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref);
2590
Philipp Reisner062e8792011-02-08 11:09:18 +01002591 idr_destroy(&tconn->volumes);
Philipp Reisner21114382011-01-19 12:26:59 +01002592
Philipp Reisner774b3052011-02-22 02:07:03 -05002593 free_cpumask_var(tconn->cpu_mask);
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002594 drbd_free_socket(&tconn->meta);
2595 drbd_free_socket(&tconn->data);
Philipp Reisner21114382011-01-19 12:26:59 +01002596 kfree(tconn->name);
Philipp Reisnerb42a70a2011-01-27 10:55:20 +01002597 kfree(tconn->int_dig_in);
2598 kfree(tconn->int_dig_vv);
Philipp Reisner21114382011-01-19 12:26:59 +01002599 kfree(tconn);
2600}
2601
Philipp Reisner774b3052011-02-22 02:07:03 -05002602enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002603{
2604 struct drbd_conf *mdev;
2605 struct gendisk *disk;
2606 struct request_queue *q;
Philipp Reisner774b3052011-02-22 02:07:03 -05002607 int vnr_got = vnr;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002608 int minor_got = minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002609 enum drbd_ret_code err = ERR_NOMEM;
Philipp Reisner774b3052011-02-22 02:07:03 -05002610
2611 mdev = minor_to_mdev(minor);
2612 if (mdev)
2613 return ERR_MINOR_EXISTS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002614
2615 /* GFP_KERNEL, we are outside of all write-out paths */
2616 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
2617 if (!mdev)
Philipp Reisner774b3052011-02-22 02:07:03 -05002618 return ERR_NOMEM;
2619
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002620 kref_get(&tconn->kref);
Philipp Reisner774b3052011-02-22 02:07:03 -05002621 mdev->tconn = tconn;
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002622
Philipp Reisnerb411b362009-09-25 16:07:19 -07002623 mdev->minor = minor;
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002624 mdev->vnr = vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002625
2626 drbd_init_set_defaults(mdev);
2627
2628 q = blk_alloc_queue(GFP_KERNEL);
2629 if (!q)
2630 goto out_no_q;
2631 mdev->rq_queue = q;
2632 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002633
2634 disk = alloc_disk(1);
2635 if (!disk)
2636 goto out_no_disk;
2637 mdev->vdisk = disk;
2638
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002639 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640
2641 disk->queue = q;
2642 disk->major = DRBD_MAJOR;
2643 disk->first_minor = minor;
2644 disk->fops = &drbd_ops;
2645 sprintf(disk->disk_name, "drbd%d", minor);
2646 disk->private_data = mdev;
2647
2648 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
2649 /* we have no partitions. we contain only ourselves. */
2650 mdev->this_bdev->bd_contains = mdev->this_bdev;
2651
2652 q->backing_dev_info.congested_fn = drbd_congested;
2653 q->backing_dev_info.congested_data = mdev;
2654
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01002655 blk_queue_make_request(q, drbd_make_request);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002656 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2657 This triggers a max_bio_size message upon first attach or connect */
2658 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002659 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2660 blk_queue_merge_bvec(q, drbd_merge_bvec);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002661 q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662
2663 mdev->md_io_page = alloc_page(GFP_KERNEL);
2664 if (!mdev->md_io_page)
2665 goto out_no_io_page;
2666
2667 if (drbd_bm_init(mdev))
2668 goto out_no_bitmap;
Andreas Gruenbacherdac13892011-01-21 17:18:39 +01002669 mdev->read_requests = RB_ROOT;
Andreas Gruenbacherde696712011-01-20 15:00:24 +01002670 mdev->write_requests = RB_ROOT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671
Philipp Reisnerb411b362009-09-25 16:07:19 -07002672 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
2673 if (!mdev->current_epoch)
2674 goto out_no_epoch;
2675
2676 INIT_LIST_HEAD(&mdev->current_epoch->list);
2677 mdev->epochs = 1;
2678
Lars Ellenberg8432b312011-03-08 16:11:16 +01002679 if (!idr_pre_get(&minors, GFP_KERNEL))
2680 goto out_no_minor_idr;
2681 if (idr_get_new_above(&minors, mdev, minor, &minor_got))
2682 goto out_no_minor_idr;
2683 if (minor_got != minor) {
2684 err = ERR_MINOR_EXISTS;
2685 drbd_msg_put_info("requested minor exists already");
2686 goto out_idr_remove_minor;
Lars Ellenberg569083c2011-03-07 09:49:02 +01002687 }
2688
Lars Ellenberg8432b312011-03-08 16:11:16 +01002689 if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
Lars Ellenberg569083c2011-03-07 09:49:02 +01002690 goto out_idr_remove_minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002691 if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
2692 goto out_idr_remove_minor;
2693 if (vnr_got != vnr) {
2694 err = ERR_INVALID_REQUEST;
2695 drbd_msg_put_info("requested volume exists already");
2696 goto out_idr_remove_vol;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002697 }
Philipp Reisner774b3052011-02-22 02:07:03 -05002698 add_disk(disk);
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002699 kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
Philipp Reisner774b3052011-02-22 02:07:03 -05002700
Philipp Reisner2325eb62011-03-15 16:56:18 +01002701 /* inherit the connection state */
2702 mdev->state.conn = tconn->cstate;
2703 if (mdev->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002704 drbd_connected(mdev);
Philipp Reisner2325eb62011-03-15 16:56:18 +01002705
Philipp Reisner774b3052011-02-22 02:07:03 -05002706 return NO_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002707
Lars Ellenberg569083c2011-03-07 09:49:02 +01002708out_idr_remove_vol:
2709 idr_remove(&tconn->volumes, vnr_got);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002710out_idr_remove_minor:
2711 idr_remove(&minors, minor_got);
Lars Ellenberg569083c2011-03-07 09:49:02 +01002712 synchronize_rcu();
Lars Ellenberg8432b312011-03-08 16:11:16 +01002713out_no_minor_idr:
Philipp Reisner81a5d602011-02-22 19:53:16 -05002714 kfree(mdev->current_epoch);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002715out_no_epoch:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002716 drbd_bm_cleanup(mdev);
2717out_no_bitmap:
2718 __free_page(mdev->md_io_page);
2719out_no_io_page:
2720 put_disk(disk);
2721out_no_disk:
2722 blk_cleanup_queue(q);
2723out_no_q:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002724 kfree(mdev);
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002725 kref_put(&tconn->kref, &conn_destroy);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002726 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002727}
2728
Philipp Reisnerb411b362009-09-25 16:07:19 -07002729int __init drbd_init(void)
2730{
2731 int err;
2732
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01002733 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002734 printk(KERN_ERR
Philipp Reisner81a5d602011-02-22 19:53:16 -05002735 "drbd: invalid minor_count (%d)\n", minor_count);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002736#ifdef MODULE
2737 return -EINVAL;
2738#else
Andreas Gruenbacher46530e82011-05-31 13:08:53 +02002739 minor_count = DRBD_MINOR_COUNT_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740#endif
2741 }
2742
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 err = register_blkdev(DRBD_MAJOR, "drbd");
2744 if (err) {
2745 printk(KERN_ERR
2746 "drbd: unable to register block device major %d\n",
2747 DRBD_MAJOR);
2748 return err;
2749 }
2750
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002751 err = drbd_genl_register();
2752 if (err) {
2753 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2754 goto fail;
2755 }
2756
2757
Philipp Reisnerb411b362009-09-25 16:07:19 -07002758 register_reboot_notifier(&drbd_notifier);
2759
2760 /*
2761 * allocate all necessary structs
2762 */
2763 err = -ENOMEM;
2764
2765 init_waitqueue_head(&drbd_pp_wait);
2766
2767 drbd_proc = NULL; /* play safe for drbd_cleanup */
Philipp Reisner81a5d602011-02-22 19:53:16 -05002768 idr_init(&minors);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002769
2770 err = drbd_create_mempools();
2771 if (err)
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002772 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01002774 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 if (!drbd_proc) {
2776 printk(KERN_ERR "drbd: unable to register proc file\n");
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002777 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 }
2779
2780 rwlock_init(&global_state_lock);
Philipp Reisner21114382011-01-19 12:26:59 +01002781 INIT_LIST_HEAD(&drbd_tconns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002782
2783 printk(KERN_INFO "drbd: initialized. "
2784 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2785 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2786 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
2787 printk(KERN_INFO "drbd: registered as block device major %d\n",
2788 DRBD_MAJOR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789
2790 return 0; /* Success! */
2791
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002792fail:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 drbd_cleanup();
2794 if (err == -ENOMEM)
2795 /* currently always the case */
2796 printk(KERN_ERR "drbd: ran out of memory\n");
2797 else
2798 printk(KERN_ERR "drbd: initialization failure\n");
2799 return err;
2800}
2801
2802void drbd_free_bc(struct drbd_backing_dev *ldev)
2803{
2804 if (ldev == NULL)
2805 return;
2806
Tejun Heoe525fd82010-11-13 11:55:17 +01002807 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2808 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002809
2810 kfree(ldev);
2811}
2812
Philipp Reisner360cc742011-02-08 14:29:53 +01002813void drbd_free_sock(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002814{
Philipp Reisner360cc742011-02-08 14:29:53 +01002815 if (tconn->data.socket) {
2816 mutex_lock(&tconn->data.mutex);
2817 kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
2818 sock_release(tconn->data.socket);
2819 tconn->data.socket = NULL;
2820 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002821 }
Philipp Reisner360cc742011-02-08 14:29:53 +01002822 if (tconn->meta.socket) {
2823 mutex_lock(&tconn->meta.mutex);
2824 kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
2825 sock_release(tconn->meta.socket);
2826 tconn->meta.socket = NULL;
2827 mutex_unlock(&tconn->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828 }
2829}
2830
Philipp Reisnerb411b362009-09-25 16:07:19 -07002831/* meta data management */
2832
2833struct meta_data_on_disk {
2834 u64 la_size; /* last agreed size. */
2835 u64 uuid[UI_SIZE]; /* UUIDs. */
2836 u64 device_uuid;
2837 u64 reserved_u64_1;
2838 u32 flags; /* MDF */
2839 u32 magic;
2840 u32 md_size_sect;
2841 u32 al_offset; /* offset to this block */
2842 u32 al_nr_extents; /* important for restoring the AL */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002843 /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844 u32 bm_offset; /* offset to the bitmap, from here */
2845 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02002846 u32 la_peer_max_bio_size; /* last peer max_bio_size */
2847 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848
2849} __packed;
2850
2851/**
2852 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
2853 * @mdev: DRBD device.
2854 */
2855void drbd_md_sync(struct drbd_conf *mdev)
2856{
2857 struct meta_data_on_disk *buffer;
2858 sector_t sector;
2859 int i;
2860
Lars Ellenbergee15b032010-09-03 10:00:09 +02002861 del_timer(&mdev->md_sync_timer);
2862 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
2864 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865
2866 /* We use here D_FAILED and not D_ATTACHING because we try to write
2867 * metadata even if we detach due to a disk failure! */
2868 if (!get_ldev_if_state(mdev, D_FAILED))
2869 return;
2870
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 mutex_lock(&mdev->md_io_mutex);
2872 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2873 memset(buffer, 0, 512);
2874
2875 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
2876 for (i = UI_CURRENT; i < UI_SIZE; i++)
2877 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
2878 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
2879 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
2880
2881 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
2882 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
2883 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
2884 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
2885 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
2886
2887 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002888 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889
2890 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
2891 sector = mdev->ldev->md.md_offset;
2892
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +01002893 if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894 /* this was a try anyways ... */
2895 dev_err(DEV, "meta data update failed!\n");
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002896 drbd_chk_io_error(mdev, 1, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897 }
2898
2899 /* Update mdev->ldev->md.la_size_sect,
2900 * since we updated it on metadata. */
2901 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
2902
2903 mutex_unlock(&mdev->md_io_mutex);
2904 put_ldev(mdev);
2905}
2906
2907/**
2908 * drbd_md_read() - Reads in the meta data super block
2909 * @mdev: DRBD device.
2910 * @bdev: Device from which the meta data should be read in.
2911 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01002912 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07002913 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
2914 */
2915int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2916{
2917 struct meta_data_on_disk *buffer;
2918 int i, rv = NO_ERROR;
2919
2920 if (!get_ldev_if_state(mdev, D_ATTACHING))
2921 return ERR_IO_MD_DISK;
2922
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923 mutex_lock(&mdev->md_io_mutex);
2924 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2925
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +01002926 if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002927 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928 called BEFORE disk is attached */
2929 dev_err(DEV, "Error while reading metadata.\n");
2930 rv = ERR_IO_MD_DISK;
2931 goto err;
2932 }
2933
Andreas Gruenbachere7fad8a2011-01-11 13:54:02 +01002934 if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002935 dev_err(DEV, "Error while reading metadata, magic not found.\n");
2936 rv = ERR_MD_INVALID;
2937 goto err;
2938 }
2939 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
2940 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
2941 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
2942 rv = ERR_MD_INVALID;
2943 goto err;
2944 }
2945 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
2946 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
2947 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
2948 rv = ERR_MD_INVALID;
2949 goto err;
2950 }
2951 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
2952 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
2953 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
2954 rv = ERR_MD_INVALID;
2955 goto err;
2956 }
2957
2958 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
2959 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
2960 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
2961 rv = ERR_MD_INVALID;
2962 goto err;
2963 }
2964
2965 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
2966 for (i = UI_CURRENT; i < UI_SIZE; i++)
2967 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
2968 bdev->md.flags = be32_to_cpu(buffer->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002969 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
2970
Philipp Reisner87eeee42011-01-19 14:16:30 +01002971 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002972 if (mdev->state.conn < C_CONNECTED) {
2973 int peer;
2974 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
2975 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
2976 mdev->peer_max_bio_size = peer;
2977 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01002978 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002979
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002980 /* This blocks wants to be get removed... */
2981 bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents);
2982 if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
2983 bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984
2985 err:
2986 mutex_unlock(&mdev->md_io_mutex);
2987 put_ldev(mdev);
2988
2989 return rv;
2990}
2991
2992/**
2993 * drbd_md_mark_dirty() - Mark meta data super block as dirty
2994 * @mdev: DRBD device.
2995 *
2996 * Call this function if you change anything that should be written to
2997 * the meta-data super block. This function sets MD_DIRTY, and starts a
2998 * timer that ensures that within five seconds you have to call drbd_md_sync().
2999 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003000#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02003001void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
3002{
3003 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
3004 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
3005 mdev->last_md_mark_dirty.line = line;
3006 mdev->last_md_mark_dirty.func = func;
3007 }
3008}
3009#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010void drbd_md_mark_dirty(struct drbd_conf *mdev)
3011{
Lars Ellenbergee15b032010-09-03 10:00:09 +02003012 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003013 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003014}
Lars Ellenbergee15b032010-09-03 10:00:09 +02003015#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016
3017static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3018{
3019 int i;
3020
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003021 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003022 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003023}
3024
3025void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3026{
3027 if (idx == UI_CURRENT) {
3028 if (mdev->state.role == R_PRIMARY)
3029 val |= 1;
3030 else
3031 val &= ~((u64)1);
3032
3033 drbd_set_ed_uuid(mdev, val);
3034 }
3035
3036 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003037 drbd_md_mark_dirty(mdev);
3038}
3039
3040
3041void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3042{
3043 if (mdev->ldev->md.uuid[idx]) {
3044 drbd_uuid_move_history(mdev);
3045 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046 }
3047 _drbd_uuid_set(mdev, idx, val);
3048}
3049
3050/**
3051 * drbd_uuid_new_current() - Creates a new current UUID
3052 * @mdev: DRBD device.
3053 *
3054 * Creates a new current UUID, and rotates the old current UUID into
3055 * the bitmap slot. Causes an incremental resync upon next connect.
3056 */
3057void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3058{
3059 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003060 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003062 if (bm_uuid)
3063 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3064
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066
3067 get_random_bytes(&val, sizeof(u64));
3068 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003069 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02003070 /* get it to stable storage _now_ */
3071 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072}
3073
3074void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3075{
3076 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
3077 return;
3078
3079 if (val == 0) {
3080 drbd_uuid_move_history(mdev);
3081 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
3082 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003084 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
3085 if (bm_uuid)
3086 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003087
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003088 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 }
3090 drbd_md_mark_dirty(mdev);
3091}
3092
3093/**
3094 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3095 * @mdev: DRBD device.
3096 *
3097 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
3098 */
3099int drbd_bmio_set_n_write(struct drbd_conf *mdev)
3100{
3101 int rv = -EIO;
3102
3103 if (get_ldev_if_state(mdev, D_ATTACHING)) {
3104 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
3105 drbd_md_sync(mdev);
3106 drbd_bm_set_all(mdev);
3107
3108 rv = drbd_bm_write(mdev);
3109
3110 if (!rv) {
3111 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
3112 drbd_md_sync(mdev);
3113 }
3114
3115 put_ldev(mdev);
3116 }
3117
3118 return rv;
3119}
3120
3121/**
3122 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3123 * @mdev: DRBD device.
3124 *
3125 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
3126 */
3127int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
3128{
3129 int rv = -EIO;
3130
Philipp Reisner07782862010-08-31 12:00:50 +02003131 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003132 if (get_ldev_if_state(mdev, D_ATTACHING)) {
3133 drbd_bm_clear_all(mdev);
3134 rv = drbd_bm_write(mdev);
3135 put_ldev(mdev);
3136 }
3137
3138 return rv;
3139}
3140
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003141static int w_bitmap_io(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142{
3143 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01003144 struct drbd_conf *mdev = w->mdev;
Lars Ellenberg02851e92010-12-16 14:47:39 +01003145 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003146
3147 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
3148
Lars Ellenberg02851e92010-12-16 14:47:39 +01003149 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003150 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01003151 rv = work->io_fn(mdev);
3152 drbd_bm_unlock(mdev);
3153 put_ldev(mdev);
3154 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003155
Lars Ellenberg4738fa12011-02-21 13:20:55 +01003156 clear_bit_unlock(BITMAP_IO, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 wake_up(&mdev->misc_wait);
3158
3159 if (work->done)
3160 work->done(mdev, rv);
3161
3162 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
3163 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003164 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003166 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003167}
3168
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003169void drbd_ldev_destroy(struct drbd_conf *mdev)
3170{
3171 lc_destroy(mdev->resync);
3172 mdev->resync = NULL;
3173 lc_destroy(mdev->act_log);
3174 mdev->act_log = NULL;
3175 __no_warn(local,
3176 drbd_free_bc(mdev->ldev);
3177 mdev->ldev = NULL;);
3178
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003179 clear_bit(GO_DISKLESS, &mdev->flags);
3180}
3181
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003182static int w_go_diskless(struct drbd_work *w, int unused)
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003183{
Philipp Reisner00d56942011-02-09 18:09:48 +01003184 struct drbd_conf *mdev = w->mdev;
3185
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003186 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02003187 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3188 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003189 * the protected members anymore, though, so once put_ldev reaches zero
3190 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003191 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003192 return 0;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003193}
3194
3195void drbd_go_diskless(struct drbd_conf *mdev)
3196{
3197 D_ASSERT(mdev->state.disk == D_FAILED);
3198 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01003199 drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003200}
3201
Philipp Reisnerb411b362009-09-25 16:07:19 -07003202/**
3203 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3204 * @mdev: DRBD device.
3205 * @io_fn: IO callback to be called when bitmap IO is possible
3206 * @done: callback to be called after the bitmap IO was performed
3207 * @why: Descriptive text of the reason for doing the IO
3208 *
3209 * While IO on the bitmap happens we freeze application IO thus we ensure
3210 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
3211 * called from worker context. It MUST NOT be used while a previous such
3212 * work is still pending!
3213 */
3214void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3215 int (*io_fn)(struct drbd_conf *),
3216 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003217 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003218{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003219 D_ASSERT(current == mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220
3221 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
3222 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
3223 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
3224 if (mdev->bm_io_work.why)
3225 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
3226 why, mdev->bm_io_work.why);
3227
3228 mdev->bm_io_work.io_fn = io_fn;
3229 mdev->bm_io_work.done = done;
3230 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003231 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232
Philipp Reisner87eeee42011-01-19 14:16:30 +01003233 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234 set_bit(BITMAP_IO, &mdev->flags);
3235 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01003236 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01003237 drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003238 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003239 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240}
3241
3242/**
3243 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
3244 * @mdev: DRBD device.
3245 * @io_fn: IO callback to be called when bitmap IO is possible
3246 * @why: Descriptive text of the reason for doing the IO
3247 *
3248 * freezes application IO while that the actual IO operations runs. This
3249 * functions MAY NOT be called from worker context.
3250 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003251int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
3252 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253{
3254 int rv;
3255
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003256 D_ASSERT(current != mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003258 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3259 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003261 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 rv = io_fn(mdev);
3263 drbd_bm_unlock(mdev);
3264
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003265 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3266 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003267
3268 return rv;
3269}
3270
3271void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3272{
3273 if ((mdev->ldev->md.flags & flag) != flag) {
3274 drbd_md_mark_dirty(mdev);
3275 mdev->ldev->md.flags |= flag;
3276 }
3277}
3278
3279void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3280{
3281 if ((mdev->ldev->md.flags & flag) != 0) {
3282 drbd_md_mark_dirty(mdev);
3283 mdev->ldev->md.flags &= ~flag;
3284 }
3285}
3286int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3287{
3288 return (bdev->md.flags & flag) != 0;
3289}
3290
3291static void md_sync_timer_fn(unsigned long data)
3292{
3293 struct drbd_conf *mdev = (struct drbd_conf *) data;
3294
Philipp Reisnere42325a2011-01-19 13:55:45 +01003295 drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296}
3297
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003298static int w_md_sync(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299{
Philipp Reisner00d56942011-02-09 18:09:48 +01003300 struct drbd_conf *mdev = w->mdev;
3301
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02003303#ifdef DEBUG
3304 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
3305 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
3306#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003307 drbd_md_sync(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003308 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309}
3310
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003311const char *cmdname(enum drbd_packet cmd)
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003312{
3313 /* THINK may need to become several global tables
3314 * when we want to support more than
3315 * one PRO_VERSION */
3316 static const char *cmdnames[] = {
3317 [P_DATA] = "Data",
3318 [P_DATA_REPLY] = "DataReply",
3319 [P_RS_DATA_REPLY] = "RSDataReply",
3320 [P_BARRIER] = "Barrier",
3321 [P_BITMAP] = "ReportBitMap",
3322 [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
3323 [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
3324 [P_UNPLUG_REMOTE] = "UnplugRemote",
3325 [P_DATA_REQUEST] = "DataRequest",
3326 [P_RS_DATA_REQUEST] = "RSDataRequest",
3327 [P_SYNC_PARAM] = "SyncParam",
3328 [P_SYNC_PARAM89] = "SyncParam89",
3329 [P_PROTOCOL] = "ReportProtocol",
3330 [P_UUIDS] = "ReportUUIDs",
3331 [P_SIZES] = "ReportSizes",
3332 [P_STATE] = "ReportState",
3333 [P_SYNC_UUID] = "ReportSyncUUID",
3334 [P_AUTH_CHALLENGE] = "AuthChallenge",
3335 [P_AUTH_RESPONSE] = "AuthResponse",
3336 [P_PING] = "Ping",
3337 [P_PING_ACK] = "PingAck",
3338 [P_RECV_ACK] = "RecvAck",
3339 [P_WRITE_ACK] = "WriteAck",
3340 [P_RS_WRITE_ACK] = "RSWriteAck",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003341 [P_DISCARD_WRITE] = "DiscardWrite",
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003342 [P_NEG_ACK] = "NegAck",
3343 [P_NEG_DREPLY] = "NegDReply",
3344 [P_NEG_RS_DREPLY] = "NegRSDReply",
3345 [P_BARRIER_ACK] = "BarrierAck",
3346 [P_STATE_CHG_REQ] = "StateChgRequest",
3347 [P_STATE_CHG_REPLY] = "StateChgReply",
3348 [P_OV_REQUEST] = "OVRequest",
3349 [P_OV_REPLY] = "OVReply",
3350 [P_OV_RESULT] = "OVResult",
3351 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
3352 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3353 [P_COMPRESSED_BITMAP] = "CBitmap",
3354 [P_DELAY_PROBE] = "DelayProbe",
3355 [P_OUT_OF_SYNC] = "OutOfSync",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003356 [P_RETRY_WRITE] = "RetryWrite",
Lars Ellenbergae25b332011-04-24 00:01:16 +02003357 [P_RS_CANCEL] = "RSCancel",
3358 [P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
3359 [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
Philipp Reisner036b17e2011-05-16 17:38:11 +02003360 [P_RETRY_WRITE] = "retry_write",
3361 [P_PROTOCOL_UPDATE] = "protocol_update",
Lars Ellenbergae25b332011-04-24 00:01:16 +02003362
3363 /* enum drbd_packet, but not commands - obsoleted flags:
3364 * P_MAY_IGNORE
3365 * P_MAX_OPT_CMD
3366 */
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003367 };
3368
Lars Ellenbergae25b332011-04-24 00:01:16 +02003369 /* too big for the array: 0xfffX */
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +02003370 if (cmd == P_INITIAL_META)
3371 return "InitialMeta";
3372 if (cmd == P_INITIAL_DATA)
3373 return "InitialData";
Andreas Gruenbacher60381782011-03-28 17:05:50 +02003374 if (cmd == P_CONNECTION_FEATURES)
3375 return "ConnectionFeatures";
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003376 if (cmd >= ARRAY_SIZE(cmdnames))
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003377 return "Unknown";
3378 return cmdnames[cmd];
3379}
3380
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003381/**
3382 * drbd_wait_misc - wait for a request to make progress
3383 * @mdev: device associated with the request
3384 * @i: the struct drbd_interval embedded in struct drbd_request or
3385 * struct drbd_peer_request
3386 */
3387int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
3388{
Philipp Reisner44ed1672011-04-19 17:10:19 +02003389 struct net_conf *nc;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003390 DEFINE_WAIT(wait);
3391 long timeout;
3392
Philipp Reisner44ed1672011-04-19 17:10:19 +02003393 rcu_read_lock();
3394 nc = rcu_dereference(mdev->tconn->net_conf);
3395 if (!nc) {
3396 rcu_read_unlock();
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003397 return -ETIMEDOUT;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003398 }
3399 timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT;
3400 rcu_read_unlock();
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003401
3402 /* Indicate to wake up mdev->misc_wait on progress. */
3403 i->waiting = true;
3404 prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
3405 spin_unlock_irq(&mdev->tconn->req_lock);
3406 timeout = schedule_timeout(timeout);
3407 finish_wait(&mdev->misc_wait, &wait);
3408 spin_lock_irq(&mdev->tconn->req_lock);
3409 if (!timeout || mdev->state.conn < C_CONNECTED)
3410 return -ETIMEDOUT;
3411 if (signal_pending(current))
3412 return -ERESTARTSYS;
3413 return 0;
3414}
3415
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416#ifdef CONFIG_DRBD_FAULT_INJECTION
3417/* Fault insertion support including random number generator shamelessly
3418 * stolen from kernel/rcutorture.c */
3419struct fault_random_state {
3420 unsigned long state;
3421 unsigned long count;
3422};
3423
3424#define FAULT_RANDOM_MULT 39916801 /* prime */
3425#define FAULT_RANDOM_ADD 479001701 /* prime */
3426#define FAULT_RANDOM_REFRESH 10000
3427
3428/*
3429 * Crude but fast random-number generator. Uses a linear congruential
3430 * generator, with occasional help from get_random_bytes().
3431 */
3432static unsigned long
3433_drbd_fault_random(struct fault_random_state *rsp)
3434{
3435 long refresh;
3436
Roel Kluin49829ea2009-12-15 22:55:44 +01003437 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438 get_random_bytes(&refresh, sizeof(refresh));
3439 rsp->state += refresh;
3440 rsp->count = FAULT_RANDOM_REFRESH;
3441 }
3442 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
3443 return swahw32(rsp->state);
3444}
3445
3446static char *
3447_drbd_fault_str(unsigned int type) {
3448 static char *_faults[] = {
3449 [DRBD_FAULT_MD_WR] = "Meta-data write",
3450 [DRBD_FAULT_MD_RD] = "Meta-data read",
3451 [DRBD_FAULT_RS_WR] = "Resync write",
3452 [DRBD_FAULT_RS_RD] = "Resync read",
3453 [DRBD_FAULT_DT_WR] = "Data write",
3454 [DRBD_FAULT_DT_RD] = "Data read",
3455 [DRBD_FAULT_DT_RA] = "Data read ahead",
3456 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02003457 [DRBD_FAULT_AL_EE] = "EE allocation",
3458 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003459 };
3460
3461 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
3462}
3463
3464unsigned int
3465_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3466{
3467 static struct fault_random_state rrs = {0, 0};
3468
3469 unsigned int ret = (
3470 (fault_devs == 0 ||
3471 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
3472 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
3473
3474 if (ret) {
3475 fault_count++;
3476
Lars Ellenberg73835062010-05-27 11:51:56 +02003477 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478 dev_warn(DEV, "***Simulating %s failure\n",
3479 _drbd_fault_str(type));
3480 }
3481
3482 return ret;
3483}
3484#endif
3485
3486const char *drbd_buildtag(void)
3487{
3488 /* DRBD built from external sources has here a reference to the
3489 git hash of the source code. */
3490
3491 static char buildtag[38] = "\0uilt-in";
3492
3493 if (buildtag[0] == 0) {
3494#ifdef CONFIG_MODULES
3495 if (THIS_MODULE != NULL)
3496 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
3497 else
3498#endif
3499 buildtag[0] = 'b';
3500 }
3501
3502 return buildtag;
3503}
3504
3505module_init(drbd_init)
3506module_exit(drbd_cleanup)
3507
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508EXPORT_SYMBOL(drbd_conn_str);
3509EXPORT_SYMBOL(drbd_role_str);
3510EXPORT_SYMBOL(drbd_disk_str);
3511EXPORT_SYMBOL(drbd_set_st_err_str);