blob: 933d4767c11093cbc2eca6caa260c1d97bcf2583 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020059static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070060int drbdd_init(struct drbd_thread *);
61int drbd_worker(struct drbd_thread *);
62int drbd_asender(struct drbd_thread *);
63
64int drbd_init(void);
65static int drbd_open(struct block_device *bdev, fmode_t mode);
66static int drbd_release(struct gendisk *gd, fmode_t mode);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010067static int w_md_sync(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068static void md_sync_timer_fn(unsigned long data);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010069static int w_bitmap_io(struct drbd_work *w, int unused);
70static int w_go_diskless(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
73 "Lars Ellenberg <lars@linbit.com>");
74MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
75MODULE_VERSION(REL_VERSION);
76MODULE_LICENSE("GPL");
Philipp Reisner81a5d602011-02-22 19:53:16 -050077MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010078 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070079MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
80
81#include <linux/moduleparam.h>
82/* allow_open_on_secondary */
83MODULE_PARM_DESC(allow_oos, "DONT USE!");
84/* thanks to these macros, if compiled into the kernel (not-module),
85 * this becomes the boot parameter drbd.minor_count */
86module_param(minor_count, uint, 0444);
87module_param(disable_sendpage, bool, 0644);
88module_param(allow_oos, bool, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -070089module_param(proc_details, int, 0644);
90
91#ifdef CONFIG_DRBD_FAULT_INJECTION
92int enable_faults;
93int fault_rate;
94static int fault_count;
95int fault_devs;
96/* bitmap of enabled faults */
97module_param(enable_faults, int, 0664);
98/* fault rate % value - applies to all enabled faults */
99module_param(fault_rate, int, 0664);
100/* count of faults inserted */
101module_param(fault_count, int, 0664);
102/* bitmap of devices to insert faults on */
103module_param(fault_devs, int, 0644);
104#endif
105
106/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100107unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108int disable_sendpage;
109int allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700110int proc_details; /* Detail level in proc drbd*/
111
112/* Module parameter for setting the user mode helper program
113 * to run. Default is /sbin/drbdadm */
114char usermode_helper[80] = "/sbin/drbdadm";
115
116module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
117
118/* in 2.6.x, our device mapping and config info contains our virtual gendisks
119 * as member "struct gendisk *vdisk;"
120 */
Philipp Reisner81a5d602011-02-22 19:53:16 -0500121struct idr minors;
Philipp Reisner21114382011-01-19 12:26:59 +0100122struct list_head drbd_tconns; /* list of struct drbd_tconn */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700123
124struct kmem_cache *drbd_request_cache;
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +0100125struct kmem_cache *drbd_ee_cache; /* peer requests */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700126struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
127struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
128mempool_t *drbd_request_mempool;
129mempool_t *drbd_ee_mempool;
Lars Ellenberg35abf592011-02-23 12:39:46 +0100130mempool_t *drbd_md_io_page_pool;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100131struct bio_set *drbd_md_io_bio_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700132
133/* I do not use a standard mempool, because:
134 1) I want to hand out the pre-allocated objects first.
135 2) I want to be able to interrupt sleeping allocation with a signal.
136 Note: This is a single linked list, the next pointer is the private
137 member of struct page.
138 */
139struct page *drbd_pp_pool;
140spinlock_t drbd_pp_lock;
141int drbd_pp_vacant;
142wait_queue_head_t drbd_pp_wait;
143
144DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
145
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100146static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700147 .owner = THIS_MODULE,
148 .open = drbd_open,
149 .release = drbd_release,
150};
151
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100152static void bio_destructor_drbd(struct bio *bio)
153{
154 bio_free(bio, drbd_md_io_bio_set);
155}
156
157struct bio *bio_alloc_drbd(gfp_t gfp_mask)
158{
159 struct bio *bio;
160
161 if (!drbd_md_io_bio_set)
162 return bio_alloc(gfp_mask, 1);
163
164 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
165 if (!bio)
166 return NULL;
167 bio->bi_destructor = bio_destructor_drbd;
168 return bio;
169}
170
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171#ifdef __CHECKER__
172/* When checking with sparse, and this is an inline function, sparse will
173 give tons of false positives. When this is a real functions sparse works.
174 */
175int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
176{
177 int io_allowed;
178
179 atomic_inc(&mdev->local_cnt);
180 io_allowed = (mdev->state.disk >= mins);
181 if (!io_allowed) {
182 if (atomic_dec_and_test(&mdev->local_cnt))
183 wake_up(&mdev->misc_wait);
184 }
185 return io_allowed;
186}
187
188#endif
189
190/**
191 * DOC: The transfer log
192 *
193 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100194 * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195 * of the list. There is always at least one &struct drbd_tl_epoch object.
196 *
197 * Each &struct drbd_tl_epoch has a circular double linked list of requests
198 * attached.
199 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100200static int tl_init(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700201{
202 struct drbd_tl_epoch *b;
203
204 /* during device minor initialization, we may well use GFP_KERNEL */
205 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
206 if (!b)
207 return 0;
208 INIT_LIST_HEAD(&b->requests);
209 INIT_LIST_HEAD(&b->w.list);
210 b->next = NULL;
211 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200212 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
214
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100215 tconn->oldest_tle = b;
216 tconn->newest_tle = b;
217 INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219 return 1;
220}
221
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100222static void tl_cleanup(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100224 if (tconn->oldest_tle != tconn->newest_tle)
225 conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
226 if (!list_empty(&tconn->out_of_sequence_requests))
227 conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
228 kfree(tconn->oldest_tle);
229 tconn->oldest_tle = NULL;
230 kfree(tconn->unused_spare_tle);
231 tconn->unused_spare_tle = NULL;
Andreas Gruenbacherd6287692011-01-13 23:05:39 +0100232}
233
Philipp Reisnerb411b362009-09-25 16:07:19 -0700234/**
235 * _tl_add_barrier() - Adds a barrier to the transfer log
236 * @mdev: DRBD device.
237 * @new: Barrier to be added before the current head of the TL.
238 *
239 * The caller must hold the req_lock.
240 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100241void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242{
243 struct drbd_tl_epoch *newest_before;
244
245 INIT_LIST_HEAD(&new->requests);
246 INIT_LIST_HEAD(&new->w.list);
247 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
248 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200249 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100251 newest_before = tconn->newest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252 /* never send a barrier number == 0, because that is special-cased
253 * when using TCQ for our write ordering code */
254 new->br_number = (newest_before->br_number+1) ?: 1;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100255 if (tconn->newest_tle != new) {
256 tconn->newest_tle->next = new;
257 tconn->newest_tle = new;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700258 }
259}
260
261/**
262 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
263 * @mdev: DRBD device.
264 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
265 * @set_size: Expected number of requests before that barrier.
266 *
267 * In case the passed barrier_nr or set_size does not match the oldest
268 * &struct drbd_tl_epoch objects this function will cause a termination
269 * of the connection.
270 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100271void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
272 unsigned int set_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700273{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100274 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 struct drbd_tl_epoch *b, *nob; /* next old barrier */
276 struct list_head *le, *tle;
277 struct drbd_request *r;
278
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100279 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100281 b = tconn->oldest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700282
283 /* first some paranoia code */
284 if (b == NULL) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100285 conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
286 barrier_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700287 goto bail;
288 }
289 if (b->br_number != barrier_nr) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100290 conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
291 barrier_nr, b->br_number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700292 goto bail;
293 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200294 if (b->n_writes != set_size) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100295 conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
296 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700297 goto bail;
298 }
299
300 /* Clean up list of requests processed during current epoch */
301 list_for_each_safe(le, tle, &b->requests) {
302 r = list_entry(le, struct drbd_request, tl_requests);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100303 _req_mod(r, BARRIER_ACKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 }
305 /* There could be requests on the list waiting for completion
306 of the write to the local disk. To avoid corruptions of
307 slab's data structures we have to remove the lists head.
308
309 Also there could have been a barrier ack out of sequence, overtaking
310 the write acks - which would be a bug and violating write ordering.
311 To not deadlock in case we lose connection while such requests are
312 still pending, we need some way to find them for the
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100313 _req_mode(CONNECTION_LOST_WHILE_PENDING).
Philipp Reisnerb411b362009-09-25 16:07:19 -0700314
315 These have been list_move'd to the out_of_sequence_requests list in
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100316 _req_mod(, BARRIER_ACKED) above.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 */
318 list_del_init(&b->requests);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100319 mdev = b->w.mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320
321 nob = b->next;
322 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100323 _tl_add_barrier(tconn, b);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 if (nob)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100325 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326 /* if nob == NULL b was the only barrier, and becomes the new
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100327 barrier. Therefore tconn->oldest_tle points already to b */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 } else {
329 D_ASSERT(nob != NULL);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100330 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331 kfree(b);
332 }
333
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100334 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335 dec_ap_pending(mdev);
336
337 return;
338
339bail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100340 spin_unlock_irq(&tconn->req_lock);
341 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342}
343
Philipp Reisner617049a2010-12-22 12:48:31 +0100344
Philipp Reisner11b58e72010-05-12 17:08:26 +0200345/**
346 * _tl_restart() - Walks the transfer log, and applies an action to all requests
347 * @mdev: DRBD device.
348 * @what: The action/event to perform with all request objects
349 *
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100350 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
351 * RESTART_FROZEN_DISK_IO.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200352 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100353void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200354{
355 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200356 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200357 struct drbd_request *req;
358 int rv, n_writes, n_reads;
359
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100360 b = tconn->oldest_tle;
361 pn = &tconn->oldest_tle;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200362 while (b) {
363 n_writes = 0;
364 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200365 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200366 list_for_each_safe(le, tle, &b->requests) {
367 req = list_entry(le, struct drbd_request, tl_requests);
368 rv = _req_mod(req, what);
369
370 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
371 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
372 }
373 tmp = b->next;
374
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200375 if (n_writes) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100376 if (what == RESEND) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200377 b->n_writes = n_writes;
378 if (b->w.cb == NULL) {
379 b->w.cb = w_send_barrier;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100380 inc_ap_pending(b->w.mdev);
381 set_bit(CREATE_BARRIER, &b->w.mdev->flags);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200382 }
383
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100384 drbd_queue_work(&tconn->data.work, &b->w);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200385 }
386 pn = &b->next;
387 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200388 if (n_reads)
389 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200390 /* there could still be requests on that ring list,
391 * in case local io is still pending */
392 list_del(&b->requests);
393
394 /* dec_ap_pending corresponding to queue_barrier.
395 * the newest barrier may not have been queued yet,
396 * in which case w.cb is still NULL. */
397 if (b->w.cb != NULL)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100398 dec_ap_pending(b->w.mdev);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200399
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100400 if (b == tconn->newest_tle) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200401 /* recycle, but reinit! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100402 if (tmp != NULL)
403 conn_err(tconn, "ASSERT FAILED tmp == NULL");
Philipp Reisner11b58e72010-05-12 17:08:26 +0200404 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200405 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200406 INIT_LIST_HEAD(&b->w.list);
407 b->w.cb = NULL;
408 b->br_number = net_random();
409 b->n_writes = 0;
410
411 *pn = b;
412 break;
413 }
414 *pn = tmp;
415 kfree(b);
416 }
417 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200418 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200419 }
420}
421
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422
423/**
424 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
425 * @mdev: DRBD device.
426 *
427 * This is called after the connection to the peer was lost. The storage covered
428 * by the requests on the transfer gets marked as our of sync. Called from the
429 * receiver thread and the worker thread.
430 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100431void tl_clear(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100433 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 struct list_head *le, *tle;
435 struct drbd_request *r;
Philipp Reisnere90285e2011-03-22 12:51:21 +0100436 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100438 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700439
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100440 _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441
442 /* we expect this list to be empty. */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100443 if (!list_empty(&tconn->out_of_sequence_requests))
444 conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445
446 /* but just in case, clean it up anyways! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100447 list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 r = list_entry(le, struct drbd_request, tl_requests);
449 /* It would be nice to complete outside of spinlock.
450 * But this is easier for now. */
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100451 _req_mod(r, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 }
453
454 /* ensure bit indicating barrier is required is clear */
Philipp Reisner695d08f2011-04-11 22:53:32 -0700455 rcu_read_lock();
Philipp Reisnere90285e2011-03-22 12:51:21 +0100456 idr_for_each_entry(&tconn->volumes, mdev, vnr)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100457 clear_bit(CREATE_BARRIER, &mdev->flags);
Philipp Reisner695d08f2011-04-11 22:53:32 -0700458 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100460 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700461}
462
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100463void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200464{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100465 spin_lock_irq(&tconn->req_lock);
466 _tl_restart(tconn, what);
467 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468}
469
Philipp Reisnerb411b362009-09-25 16:07:19 -0700470static int drbd_thread_setup(void *arg)
471{
472 struct drbd_thread *thi = (struct drbd_thread *) arg;
Philipp Reisner392c8802011-02-09 10:33:31 +0100473 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700474 unsigned long flags;
475 int retval;
476
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100477 snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
Philipp Reisner392c8802011-02-09 10:33:31 +0100478 thi->name[0], thi->tconn->name);
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100479
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480restart:
481 retval = thi->function(thi);
482
483 spin_lock_irqsave(&thi->t_lock, flags);
484
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100485 /* if the receiver has been "EXITING", the last thing it did
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486 * was set the conn state to "StandAlone",
487 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
488 * and receiver thread will be "started".
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100489 * drbd_thread_start needs to set "RESTARTING" in that case.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490 * t_state check and assignment needs to be within the same spinlock,
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100491 * so either thread_start sees EXITING, and can remap to RESTARTING,
492 * or thread_start see NONE, and can proceed as normal.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493 */
494
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100495 if (thi->t_state == RESTARTING) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100496 conn_info(tconn, "Restarting %s thread\n", thi->name);
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100497 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700498 spin_unlock_irqrestore(&thi->t_lock, flags);
499 goto restart;
500 }
501
502 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100503 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700504 smp_mb();
Lars Ellenberg992d6e92011-05-02 11:47:18 +0200505 complete_all(&thi->stop);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700506 spin_unlock_irqrestore(&thi->t_lock, flags);
507
Philipp Reisner392c8802011-02-09 10:33:31 +0100508 conn_info(tconn, "Terminating %s\n", current->comm);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700509
510 /* Release mod reference taken when thread was started */
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +0200511
512 kref_put(&tconn->kref, &conn_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700513 module_put(THIS_MODULE);
514 return retval;
515}
516
Philipp Reisner392c8802011-02-09 10:33:31 +0100517static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100518 int (*func) (struct drbd_thread *), char *name)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700519{
520 spin_lock_init(&thi->t_lock);
521 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100522 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523 thi->function = func;
Philipp Reisner392c8802011-02-09 10:33:31 +0100524 thi->tconn = tconn;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100525 strncpy(thi->name, name, ARRAY_SIZE(thi->name));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700526}
527
528int drbd_thread_start(struct drbd_thread *thi)
529{
Philipp Reisner392c8802011-02-09 10:33:31 +0100530 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700531 struct task_struct *nt;
532 unsigned long flags;
533
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 /* is used from state engine doing drbd_thread_stop_nowait,
535 * while holding the req lock irqsave */
536 spin_lock_irqsave(&thi->t_lock, flags);
537
538 switch (thi->t_state) {
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100539 case NONE:
Philipp Reisner392c8802011-02-09 10:33:31 +0100540 conn_info(tconn, "Starting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100541 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542
543 /* Get ref on module for thread - this is released when thread exits */
544 if (!try_module_get(THIS_MODULE)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100545 conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700546 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100547 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548 }
549
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +0200550 kref_get(&thi->tconn->kref);
551
Philipp Reisnerb411b362009-09-25 16:07:19 -0700552 init_completion(&thi->stop);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700553 thi->reset_cpu_mask = 1;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100554 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555 spin_unlock_irqrestore(&thi->t_lock, flags);
556 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
557
558 nt = kthread_create(drbd_thread_setup, (void *) thi,
Philipp Reisner392c8802011-02-09 10:33:31 +0100559 "drbd_%c_%s", thi->name[0], thi->tconn->name);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700560
561 if (IS_ERR(nt)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100562 conn_err(tconn, "Couldn't start thread\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700563
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +0200564 kref_put(&tconn->kref, &conn_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100566 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567 }
568 spin_lock_irqsave(&thi->t_lock, flags);
569 thi->task = nt;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100570 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571 spin_unlock_irqrestore(&thi->t_lock, flags);
572 wake_up_process(nt);
573 break;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100574 case EXITING:
575 thi->t_state = RESTARTING;
Philipp Reisner392c8802011-02-09 10:33:31 +0100576 conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100577 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700578 /* fall through */
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100579 case RUNNING:
580 case RESTARTING:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700581 default:
582 spin_unlock_irqrestore(&thi->t_lock, flags);
583 break;
584 }
585
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100586 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587}
588
589
590void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
591{
592 unsigned long flags;
593
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100594 enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700595
596 /* may be called from state engine, holding the req lock irqsave */
597 spin_lock_irqsave(&thi->t_lock, flags);
598
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100599 if (thi->t_state == NONE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 spin_unlock_irqrestore(&thi->t_lock, flags);
601 if (restart)
602 drbd_thread_start(thi);
603 return;
604 }
605
606 if (thi->t_state != ns) {
607 if (thi->task == NULL) {
608 spin_unlock_irqrestore(&thi->t_lock, flags);
609 return;
610 }
611
612 thi->t_state = ns;
613 smp_mb();
614 init_completion(&thi->stop);
615 if (thi->task != current)
616 force_sig(DRBD_SIGKILL, thi->task);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 }
618
619 spin_unlock_irqrestore(&thi->t_lock, flags);
620
621 if (wait)
622 wait_for_completion(&thi->stop);
623}
624
Philipp Reisner392c8802011-02-09 10:33:31 +0100625static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100626{
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100627 struct drbd_thread *thi =
628 task == tconn->receiver.task ? &tconn->receiver :
629 task == tconn->asender.task ? &tconn->asender :
630 task == tconn->worker.task ? &tconn->worker : NULL;
631
632 return thi;
633}
634
Philipp Reisner392c8802011-02-09 10:33:31 +0100635char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100636{
Philipp Reisner392c8802011-02-09 10:33:31 +0100637 struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100638 return thi ? thi->name : task->comm;
639}
640
Philipp Reisner80883192011-02-18 14:56:45 +0100641int conn_lowest_minor(struct drbd_tconn *tconn)
Philipp Reisner80822282011-02-08 12:46:30 +0100642{
Philipp Reisnere90285e2011-03-22 12:51:21 +0100643 struct drbd_conf *mdev;
Philipp Reisner695d08f2011-04-11 22:53:32 -0700644 int vnr = 0, m;
Philipp Reisner774b3052011-02-22 02:07:03 -0500645
Philipp Reisner695d08f2011-04-11 22:53:32 -0700646 rcu_read_lock();
Philipp Reisnere90285e2011-03-22 12:51:21 +0100647 mdev = idr_get_next(&tconn->volumes, &vnr);
Philipp Reisner695d08f2011-04-11 22:53:32 -0700648 m = mdev ? mdev_to_minor(mdev) : -1;
649 rcu_read_unlock();
650
651 return m;
Philipp Reisner80822282011-02-08 12:46:30 +0100652}
Philipp Reisner774b3052011-02-22 02:07:03 -0500653
654#ifdef CONFIG_SMP
Philipp Reisnerb411b362009-09-25 16:07:19 -0700655/**
656 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
657 * @mdev: DRBD device.
658 *
659 * Forces all threads of a device onto the same CPU. This is beneficial for
660 * DRBD's performance. May be overwritten by user's configuration.
661 */
Philipp Reisner80822282011-02-08 12:46:30 +0100662void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663{
664 int ord, cpu;
665
666 /* user override. */
Philipp Reisner80822282011-02-08 12:46:30 +0100667 if (cpumask_weight(tconn->cpu_mask))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 return;
669
Philipp Reisner80822282011-02-08 12:46:30 +0100670 ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671 for_each_online_cpu(cpu) {
672 if (ord-- == 0) {
Philipp Reisner80822282011-02-08 12:46:30 +0100673 cpumask_set_cpu(cpu, tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 return;
675 }
676 }
677 /* should not be reached */
Philipp Reisner80822282011-02-08 12:46:30 +0100678 cpumask_setall(tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700679}
680
681/**
682 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
683 * @mdev: DRBD device.
Philipp Reisnerbc31fe32011-02-07 11:14:38 +0100684 * @thi: drbd_thread object
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 *
686 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
687 * prematurely.
688 */
Philipp Reisner80822282011-02-08 12:46:30 +0100689void drbd_thread_current_set_cpu(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690{
691 struct task_struct *p = current;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100692
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693 if (!thi->reset_cpu_mask)
694 return;
695 thi->reset_cpu_mask = 0;
Philipp Reisner392c8802011-02-09 10:33:31 +0100696 set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700697}
698#endif
699
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +0200700/**
701 * drbd_header_size - size of a packet header
702 *
703 * The header size is a multiple of 8, so any payload following the header is
704 * word aligned on 64-bit architectures. (The bitmap send and receive code
705 * relies on this.)
706 */
707unsigned int drbd_header_size(struct drbd_tconn *tconn)
708{
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +0200709 if (tconn->agreed_pro_version >= 100) {
710 BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
711 return sizeof(struct p_header100);
712 } else {
713 BUILD_BUG_ON(sizeof(struct p_header80) !=
714 sizeof(struct p_header95));
715 BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
716 return sizeof(struct p_header80);
717 }
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +0200718}
719
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200720static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100721{
722 h->magic = cpu_to_be32(DRBD_MAGIC);
723 h->command = cpu_to_be16(cmd);
724 h->length = cpu_to_be16(size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200725 return sizeof(struct p_header80);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100726}
727
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200728static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100729{
730 h->magic = cpu_to_be16(DRBD_MAGIC_BIG);
731 h->command = cpu_to_be16(cmd);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +0100732 h->length = cpu_to_be32(size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200733 return sizeof(struct p_header95);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100734}
735
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +0200736static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
737 int size, int vnr)
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100738{
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +0200739 h->magic = cpu_to_be32(DRBD_MAGIC_100);
740 h->volume = cpu_to_be16(vnr);
741 h->command = cpu_to_be16(cmd);
742 h->length = cpu_to_be32(size);
743 h->pad = 0;
744 return sizeof(struct p_header100);
745}
746
747static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
748 void *buffer, enum drbd_packet cmd, int size)
749{
750 if (tconn->agreed_pro_version >= 100)
751 return prepare_header100(buffer, cmd, size, vnr);
752 else if (tconn->agreed_pro_version >= 95 &&
753 size > DRBD_MAX_SIZE_H80_PACKET)
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200754 return prepare_header95(buffer, cmd, size);
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100755 else
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200756 return prepare_header80(buffer, cmd, size);
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100757}
758
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200759static void *__conn_prepare_command(struct drbd_tconn *tconn,
760 struct drbd_socket *sock)
761{
762 if (!sock->socket)
763 return NULL;
764 return sock->sbuf + drbd_header_size(tconn);
765}
766
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200767void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock)
768{
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200769 void *p;
770
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200771 mutex_lock(&sock->mutex);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200772 p = __conn_prepare_command(tconn, sock);
773 if (!p)
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200774 mutex_unlock(&sock->mutex);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200775
776 return p;
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200777}
778
779void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock)
780{
781 return conn_prepare_command(mdev->tconn, sock);
782}
783
784static int __send_command(struct drbd_tconn *tconn, int vnr,
785 struct drbd_socket *sock, enum drbd_packet cmd,
786 unsigned int header_size, void *data,
787 unsigned int size)
788{
789 int msg_flags;
790 int err;
791
792 /*
793 * Called with @data == NULL and the size of the data blocks in @size
794 * for commands that send data blocks. For those commands, omit the
795 * MSG_MORE flag: this will increase the likelihood that data blocks
796 * which are page aligned on the sender will end up page aligned on the
797 * receiver.
798 */
799 msg_flags = data ? MSG_MORE : 0;
800
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200801 header_size += prepare_header(tconn, vnr, sock->sbuf, cmd,
802 header_size + size);
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200803 err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size,
804 msg_flags);
805 if (data && !err)
806 err = drbd_send_all(tconn, sock->socket, data, size, 0);
807 return err;
808}
809
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200810static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
811 enum drbd_packet cmd, unsigned int header_size,
812 void *data, unsigned int size)
813{
814 return __send_command(tconn, 0, sock, cmd, header_size, data, size);
815}
816
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200817int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
818 enum drbd_packet cmd, unsigned int header_size,
819 void *data, unsigned int size)
820{
821 int err;
822
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200823 err = __conn_send_command(tconn, sock, cmd, header_size, data, size);
Andreas Gruenbacherdba58582011-03-29 16:55:40 +0200824 mutex_unlock(&sock->mutex);
825 return err;
826}
827
828int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock,
829 enum drbd_packet cmd, unsigned int header_size,
830 void *data, unsigned int size)
831{
832 int err;
833
834 err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size,
835 data, size);
836 mutex_unlock(&sock->mutex);
837 return err;
838}
839
Andreas Gruenbachere307f352011-03-22 10:55:48 +0100840int drbd_send_ping(struct drbd_tconn *tconn)
841{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200842 struct drbd_socket *sock;
843
844 sock = &tconn->meta;
845 if (!conn_prepare_command(tconn, sock))
846 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200847 return conn_send_command(tconn, sock, P_PING, 0, NULL, 0);
Andreas Gruenbachere307f352011-03-22 10:55:48 +0100848}
849
850int drbd_send_ping_ack(struct drbd_tconn *tconn)
851{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200852 struct drbd_socket *sock;
853
854 sock = &tconn->meta;
855 if (!conn_prepare_command(tconn, sock))
856 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200857 return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0);
Andreas Gruenbachere307f352011-03-22 10:55:48 +0100858}
859
Lars Ellenbergf3990022011-03-23 14:31:09 +0100860int drbd_send_sync_param(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861{
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100862 struct drbd_socket *sock;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200863 struct p_rs_param_95 *p;
864 int size;
Philipp Reisner31890f42011-01-19 14:12:51 +0100865 const int apv = mdev->tconn->agreed_pro_version;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200866 enum drbd_packet cmd;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200867 struct net_conf *nc;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200868 struct disk_conf *dc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200869
870 sock = &mdev->tconn->data;
871 p = drbd_prepare_command(mdev, sock);
872 if (!p)
873 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874
Philipp Reisner44ed1672011-04-19 17:10:19 +0200875 rcu_read_lock();
876 nc = rcu_dereference(mdev->tconn->net_conf);
877
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878 size = apv <= 87 ? sizeof(struct p_rs_param)
879 : apv == 88 ? sizeof(struct p_rs_param)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200880 + strlen(nc->verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +0200881 : apv <= 94 ? sizeof(struct p_rs_param_89)
882 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200884 cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200886 /* initialize verify_alg and csums_alg */
887 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200889 if (get_ldev(mdev)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200890 dc = rcu_dereference(mdev->ldev->disk_conf);
Andreas Gruenbacher6394b932011-05-11 14:29:52 +0200891 p->resync_rate = cpu_to_be32(dc->resync_rate);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200892 p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
893 p->c_delay_target = cpu_to_be32(dc->c_delay_target);
894 p->c_fill_target = cpu_to_be32(dc->c_fill_target);
895 p->c_max_rate = cpu_to_be32(dc->c_max_rate);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200896 put_ldev(mdev);
897 } else {
Andreas Gruenbacher6394b932011-05-11 14:29:52 +0200898 p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200899 p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
900 p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
901 p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
902 p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
903 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200905 if (apv >= 88)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200906 strcpy(p->verify_alg, nc->verify_alg);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200907 if (apv >= 89)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200908 strcpy(p->csums_alg, nc->csums_alg);
909 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200911 return drbd_send_command(mdev, sock, cmd, size, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912}
913
Philipp Reisnerd659f2a2011-05-16 17:38:45 +0200914int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200916 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 struct p_protocol *p;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200918 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200919 int size, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200921 sock = &tconn->data;
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200922 p = __conn_prepare_command(tconn, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200923 if (!p)
924 return -EIO;
925
Philipp Reisner44ed1672011-04-19 17:10:19 +0200926 rcu_read_lock();
927 nc = rcu_dereference(tconn->net_conf);
928
929 if (nc->dry_run && tconn->agreed_pro_version < 92) {
930 rcu_read_unlock();
931 mutex_unlock(&sock->mutex);
932 conn_err(tconn, "--dry-run is not supported by peer");
933 return -EOPNOTSUPP;
934 }
935
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200936 size = sizeof(*p);
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100937 if (tconn->agreed_pro_version >= 87)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200938 size += strlen(nc->integrity_alg) + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939
Philipp Reisner44ed1672011-04-19 17:10:19 +0200940 p->protocol = cpu_to_be32(nc->wire_protocol);
941 p->after_sb_0p = cpu_to_be32(nc->after_sb_0p);
942 p->after_sb_1p = cpu_to_be32(nc->after_sb_1p);
943 p->after_sb_2p = cpu_to_be32(nc->after_sb_2p);
944 p->two_primaries = cpu_to_be32(nc->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100945 cf = 0;
Andreas Gruenbacher6139f602011-05-06 20:00:02 +0200946 if (nc->discard_my_data)
947 cf |= CF_DISCARD_MY_DATA;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200948 if (nc->dry_run)
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200949 cf |= CF_DRY_RUN;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100950 p->conn_flags = cpu_to_be32(cf);
951
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100952 if (tconn->agreed_pro_version >= 87)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200953 strcpy(p->integrity_alg, nc->integrity_alg);
954 rcu_read_unlock();
955
Philipp Reisnerd659f2a2011-05-16 17:38:45 +0200956 return __conn_send_command(tconn, sock, cmd, size, NULL, 0);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200957}
958
959int drbd_send_protocol(struct drbd_tconn *tconn)
960{
961 int err;
962
963 mutex_lock(&tconn->data.mutex);
Philipp Reisnerd659f2a2011-05-16 17:38:45 +0200964 err = __drbd_send_protocol(tconn, P_PROTOCOL);
Andreas Gruenbachera7eb7bd2011-04-29 13:19:58 +0200965 mutex_unlock(&tconn->data.mutex);
966
967 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968}
969
970int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
971{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200972 struct drbd_socket *sock;
973 struct p_uuids *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974 int i;
975
976 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
Andreas Gruenbacher2ae5f952011-03-16 01:07:20 +0100977 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200979 sock = &mdev->tconn->data;
980 p = drbd_prepare_command(mdev, sock);
981 if (!p) {
982 put_ldev(mdev);
983 return -EIO;
984 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 for (i = UI_CURRENT; i < UI_SIZE; i++)
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200986 p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
988 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200989 p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200990 rcu_read_lock();
Andreas Gruenbacher6139f602011-05-06 20:00:02 +0200991 uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700993 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
994 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200995 p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996
997 put_ldev(mdev);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200998 return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999}
1000
1001int drbd_send_uuids(struct drbd_conf *mdev)
1002{
1003 return _drbd_send_uuids(mdev, 0);
1004}
1005
1006int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
1007{
1008 return _drbd_send_uuids(mdev, 8);
1009}
1010
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001011void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
1012{
1013 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
1014 u64 *uuid = mdev->ldev->md.uuid;
1015 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
1016 text,
1017 (unsigned long long)uuid[UI_CURRENT],
1018 (unsigned long long)uuid[UI_BITMAP],
1019 (unsigned long long)uuid[UI_HISTORY_START],
1020 (unsigned long long)uuid[UI_HISTORY_END]);
1021 put_ldev(mdev);
1022 } else {
1023 dev_info(DEV, "%s effective data uuid: %016llX\n",
1024 text,
1025 (unsigned long long)mdev->ed_uuid);
1026 }
1027}
1028
Andreas Gruenbacher9c1b7f72011-03-16 01:09:01 +01001029void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001031 struct drbd_socket *sock;
1032 struct p_rs_uuid *p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001033 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001035 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
1036
Philipp Reisner4a23f262011-01-11 17:42:17 +01001037 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001038 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001039 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001040 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001041
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001042 sock = &mdev->tconn->data;
1043 p = drbd_prepare_command(mdev, sock);
1044 if (p) {
1045 p->uuid = cpu_to_be64(uuid);
1046 drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0);
1047 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048}
1049
Philipp Reisnere89b5912010-03-24 17:11:33 +01001050int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001052 struct drbd_socket *sock;
1053 struct p_sizes *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054 sector_t d_size, u_size;
Philipp Reisner99432fc2011-05-20 16:39:13 +02001055 int q_order_type, max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056
1057 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
1058 D_ASSERT(mdev->ldev->backing_bdev);
1059 d_size = drbd_get_max_capacity(mdev->ldev);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001060 rcu_read_lock();
1061 u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
1062 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001063 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +02001064 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
1065 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 put_ldev(mdev);
1067 } else {
1068 d_size = 0;
1069 u_size = 0;
1070 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02001071 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 }
1073
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001074 sock = &mdev->tconn->data;
1075 p = drbd_prepare_command(mdev, sock);
1076 if (!p)
1077 return -EIO;
1078 p->d_size = cpu_to_be64(d_size);
1079 p->u_size = cpu_to_be64(u_size);
1080 p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
1081 p->max_bio_size = cpu_to_be32(max_bio_size);
1082 p->queue_order_type = cpu_to_be16(q_order_type);
1083 p->dds_flags = cpu_to_be16(flags);
1084 return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085}
1086
1087/**
1088 * drbd_send_state() - Sends the drbd state to the peer
1089 * @mdev: DRBD device.
1090 */
1091int drbd_send_state(struct drbd_conf *mdev)
1092{
Andreas Gruenbacher7c967152011-03-22 00:49:36 +01001093 struct drbd_socket *sock;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001094 struct p_state *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095
Andreas Gruenbacher7c967152011-03-22 00:49:36 +01001096 sock = &mdev->tconn->data;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001097 p = drbd_prepare_command(mdev, sock);
1098 if (!p)
1099 return -EIO;
1100 p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
1101 return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102}
1103
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001104int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001105{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001106 struct drbd_socket *sock;
1107 struct p_req_state *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001109 sock = &mdev->tconn->data;
1110 p = drbd_prepare_command(mdev, sock);
1111 if (!p)
1112 return -EIO;
1113 p->mask = cpu_to_be32(mask.i);
1114 p->val = cpu_to_be32(val.i);
1115 return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001117}
1118
1119int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
1120{
1121 enum drbd_packet cmd;
1122 struct drbd_socket *sock;
1123 struct p_req_state *p;
1124
1125 cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ;
1126 sock = &tconn->data;
1127 p = conn_prepare_command(tconn, sock);
1128 if (!p)
1129 return -EIO;
1130 p->mask = cpu_to_be32(mask.i);
1131 p->val = cpu_to_be32(val.i);
1132 return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001133}
1134
Andreas Gruenbacher2f4e7ab2011-03-16 01:20:38 +01001135void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001137 struct drbd_socket *sock;
1138 struct p_req_state_reply *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001139
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001140 sock = &mdev->tconn->meta;
1141 p = drbd_prepare_command(mdev, sock);
1142 if (p) {
1143 p->retcode = cpu_to_be32(retcode);
1144 drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0);
1145 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146}
1147
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001148void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001149{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001150 struct drbd_socket *sock;
1151 struct p_req_state_reply *p;
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001152 enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
1153
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001154 sock = &tconn->meta;
1155 p = conn_prepare_command(tconn, sock);
1156 if (p) {
1157 p->retcode = cpu_to_be32(retcode);
1158 conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
1159 }
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001160}
1161
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001162static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
1163{
1164 BUG_ON(code & ~0xf);
1165 p->encoding = (p->encoding & ~0xf) | code;
1166}
1167
1168static void dcbp_set_start(struct p_compressed_bm *p, int set)
1169{
1170 p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
1171}
1172
1173static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n)
1174{
1175 BUG_ON(n & ~0x7);
1176 p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
1177}
1178
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179int fill_bitmap_rle_bits(struct drbd_conf *mdev,
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001180 struct p_compressed_bm *p,
1181 unsigned int size,
1182 struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183{
1184 struct bitstream bs;
1185 unsigned long plain_bits;
1186 unsigned long tmp;
1187 unsigned long rl;
1188 unsigned len;
1189 unsigned toggle;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001190 int bits, use_rle;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191
1192 /* may we use this feature? */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001193 rcu_read_lock();
1194 use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle;
1195 rcu_read_unlock();
1196 if (!use_rle || mdev->tconn->agreed_pro_version < 90)
1197 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198
1199 if (c->bit_offset >= c->bm_bits)
1200 return 0; /* nothing to do. */
1201
1202 /* use at most thus many bytes */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001203 bitstream_init(&bs, p->code, size, 0);
1204 memset(p->code, 0, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 /* plain bits covered in this code string */
1206 plain_bits = 0;
1207
1208 /* p->encoding & 0x80 stores whether the first run length is set.
1209 * bit offset is implicit.
1210 * start with toggle == 2 to be able to tell the first iteration */
1211 toggle = 2;
1212
1213 /* see how much plain bits we can stuff into one packet
1214 * using RLE and VLI. */
1215 do {
1216 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
1217 : _drbd_bm_find_next(mdev, c->bit_offset);
1218 if (tmp == -1UL)
1219 tmp = c->bm_bits;
1220 rl = tmp - c->bit_offset;
1221
1222 if (toggle == 2) { /* first iteration */
1223 if (rl == 0) {
1224 /* the first checked bit was set,
1225 * store start value, */
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001226 dcbp_set_start(p, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 /* but skip encoding of zero run length */
1228 toggle = !toggle;
1229 continue;
1230 }
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001231 dcbp_set_start(p, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 }
1233
1234 /* paranoia: catch zero runlength.
1235 * can only happen if bitmap is modified while we scan it. */
1236 if (rl == 0) {
1237 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
1238 "t:%u bo:%lu\n", toggle, c->bit_offset);
1239 return -1;
1240 }
1241
1242 bits = vli_encode_bits(&bs, rl);
1243 if (bits == -ENOBUFS) /* buffer full */
1244 break;
1245 if (bits <= 0) {
1246 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
1247 return 0;
1248 }
1249
1250 toggle = !toggle;
1251 plain_bits += rl;
1252 c->bit_offset = tmp;
1253 } while (c->bit_offset < c->bm_bits);
1254
1255 len = bs.cur.b - p->code + !!bs.cur.bit;
1256
1257 if (plain_bits < (len << 3)) {
1258 /* incompressible with this method.
1259 * we need to rewind both word and bit position. */
1260 c->bit_offset -= plain_bits;
1261 bm_xfer_ctx_bit_to_word_offset(c);
1262 c->bit_offset = c->word_offset * BITS_PER_LONG;
1263 return 0;
1264 }
1265
1266 /* RLE + VLI was able to compress it just fine.
1267 * update c->word_offset. */
1268 bm_xfer_ctx_bit_to_word_offset(c);
1269
1270 /* store pad_bits */
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001271 dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272
1273 return len;
1274}
1275
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001276/**
1277 * send_bitmap_rle_or_plain
1278 *
1279 * Return 0 when done, 1 when another iteration is needed, and a negative error
1280 * code upon failure.
1281 */
1282static int
Andreas Gruenbacher79ed9bd2011-03-24 21:31:38 +01001283send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001285 struct drbd_socket *sock = &mdev->tconn->data;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001286 unsigned int header_size = drbd_header_size(mdev->tconn);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001287 struct p_compressed_bm *p = sock->sbuf + header_size;
Andreas Gruenbachera982dd52010-12-10 00:45:25 +01001288 int len, err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001289
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001290 len = fill_bitmap_rle_bits(mdev, p,
1291 DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001292 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001293 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294
1295 if (len) {
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01001296 dcbp_set_code(p, RLE_VLI_Bits);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001297 err = __send_command(mdev->tconn, mdev->vnr, sock,
1298 P_COMPRESSED_BITMAP, sizeof(*p) + len,
1299 NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 c->packets[0]++;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001301 c->bytes[0] += header_size + sizeof(*p) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302
1303 if (c->bit_offset >= c->bm_bits)
1304 len = 0; /* DONE */
1305 } else {
1306 /* was not compressible.
1307 * send a buffer full of plain text bits instead. */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001308 unsigned int data_size;
1309 unsigned long num_words;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001310 unsigned long *p = sock->sbuf + header_size;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001311
1312 data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001313 num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001314 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001315 len = num_words * sizeof(*p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316 if (len)
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001317 drbd_bm_get_lel(mdev, c->word_offset, num_words, p);
1318 err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001319 c->word_offset += num_words;
1320 c->bit_offset = c->word_offset * BITS_PER_LONG;
1321
1322 c->packets[1]++;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02001323 c->bytes[1] += header_size + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324
1325 if (c->bit_offset > c->bm_bits)
1326 c->bit_offset = c->bm_bits;
1327 }
Andreas Gruenbachera982dd52010-12-10 00:45:25 +01001328 if (!err) {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001329 if (len == 0) {
1330 INFO_bm_xfer_stats(mdev, "send", c);
1331 return 0;
1332 } else
1333 return 1;
1334 }
1335 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336}
1337
1338/* See the comment at receive_bitmap() */
Andreas Gruenbacher058820c2011-03-22 16:03:43 +01001339static int _drbd_send_bitmap(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001340{
1341 struct bm_xfer_ctx c;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001342 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001343
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001344 if (!expect(mdev->bitmap))
1345 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346
Philipp Reisnerb411b362009-09-25 16:07:19 -07001347 if (get_ldev(mdev)) {
1348 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1349 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
1350 drbd_bm_set_all(mdev);
1351 if (drbd_bm_write(mdev)) {
1352 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
1353 * but otherwise process as per normal - need to tell other
1354 * side that a full resync is required! */
1355 dev_err(DEV, "Failed to write bitmap to disk!\n");
1356 } else {
1357 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
1358 drbd_md_sync(mdev);
1359 }
1360 }
1361 put_ldev(mdev);
1362 }
1363
1364 c = (struct bm_xfer_ctx) {
1365 .bm_bits = drbd_bm_bits(mdev),
1366 .bm_words = drbd_bm_words(mdev),
1367 };
1368
1369 do {
Andreas Gruenbacher79ed9bd2011-03-24 21:31:38 +01001370 err = send_bitmap_rle_or_plain(mdev, &c);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001371 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001372
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001373 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001374}
1375
1376int drbd_send_bitmap(struct drbd_conf *mdev)
1377{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001378 struct drbd_socket *sock = &mdev->tconn->data;
1379 int err = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001381 mutex_lock(&sock->mutex);
1382 if (sock->socket)
1383 err = !_drbd_send_bitmap(mdev);
1384 mutex_unlock(&sock->mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 return err;
1386}
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001387
Andreas Gruenbacherd4e67d72011-03-16 01:25:28 +01001388void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001389{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001390 struct drbd_socket *sock;
1391 struct p_barrier_ack *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001392
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001393 if (mdev->state.conn < C_CONNECTED)
1394 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001396 sock = &mdev->tconn->meta;
1397 p = drbd_prepare_command(mdev, sock);
1398 if (!p)
1399 return;
1400 p->barrier = barrier_nr;
1401 p->set_size = cpu_to_be32(set_size);
1402 drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403}
1404
1405/**
1406 * _drbd_send_ack() - Sends an ack packet
1407 * @mdev: DRBD device.
1408 * @cmd: Packet command code.
1409 * @sector: sector, needs to be in big endian byte order
1410 * @blksize: size in byte, needs to be in big endian byte order
1411 * @block_id: Id, big endian byte order
1412 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001413static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
1414 u64 sector, u32 blksize, u64 block_id)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001415{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001416 struct drbd_socket *sock;
1417 struct p_block_ack *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001418
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001419 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbachera8c32aa2011-03-16 01:27:22 +01001420 return -EIO;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001421
1422 sock = &mdev->tconn->meta;
1423 p = drbd_prepare_command(mdev, sock);
1424 if (!p)
1425 return -EIO;
1426 p->sector = sector;
1427 p->block_id = block_id;
1428 p->blksize = blksize;
1429 p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
1430 return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431}
1432
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001433/* dp->sector and dp->block_id already/still in network byte order,
1434 * data_size is payload size according to dp->head,
1435 * and may need to be corrected for digest size. */
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001436void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
1437 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438{
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001439 if (mdev->tconn->peer_integrity_tfm)
1440 data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001441 _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
1442 dp->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001443}
1444
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001445void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
1446 struct p_block_req *rp)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001447{
Andreas Gruenbachera9a99942011-03-16 01:30:14 +01001448 _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449}
1450
1451/**
1452 * drbd_send_ack() - Sends an ack packet
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001453 * @mdev: DRBD device
1454 * @cmd: packet command code
1455 * @peer_req: peer request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001457int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001458 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459{
Andreas Gruenbacherdd516122011-03-16 15:39:08 +01001460 return _drbd_send_ack(mdev, cmd,
1461 cpu_to_be64(peer_req->i.sector),
1462 cpu_to_be32(peer_req->i.size),
1463 peer_req->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464}
1465
1466/* This function misuses the block_id field to signal if the blocks
1467 * are is sync or not. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001468int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469 sector_t sector, int blksize, u64 block_id)
1470{
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001471 return _drbd_send_ack(mdev, cmd,
1472 cpu_to_be64(sector),
1473 cpu_to_be32(blksize),
1474 cpu_to_be64(block_id));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475}
1476
1477int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1478 sector_t sector, int size, u64 block_id)
1479{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001480 struct drbd_socket *sock;
1481 struct p_block_req *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001483 sock = &mdev->tconn->data;
1484 p = drbd_prepare_command(mdev, sock);
1485 if (!p)
1486 return -EIO;
1487 p->sector = cpu_to_be64(sector);
1488 p->block_id = block_id;
1489 p->blksize = cpu_to_be32(size);
1490 return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491}
1492
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001493int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
1494 void *digest, int digest_size, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001496 struct drbd_socket *sock;
1497 struct p_block_req *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001499 /* FIXME: Put the digest into the preallocated socket buffer. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001501 sock = &mdev->tconn->data;
1502 p = drbd_prepare_command(mdev, sock);
1503 if (!p)
1504 return -EIO;
1505 p->sector = cpu_to_be64(sector);
1506 p->block_id = ID_SYNCER /* unused */;
1507 p->blksize = cpu_to_be32(size);
1508 return drbd_send_command(mdev, sock, cmd, sizeof(*p),
1509 digest, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510}
1511
1512int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
1513{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001514 struct drbd_socket *sock;
1515 struct p_block_req *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001517 sock = &mdev->tconn->data;
1518 p = drbd_prepare_command(mdev, sock);
1519 if (!p)
1520 return -EIO;
1521 p->sector = cpu_to_be64(sector);
1522 p->block_id = ID_SYNCER /* unused */;
1523 p->blksize = cpu_to_be32(size);
1524 return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525}
1526
1527/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001528 * returns false if we should retry,
1529 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 */
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001531static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532{
1533 int drop_it;
1534 /* long elapsed = (long)(jiffies - mdev->last_received); */
1535
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001536 drop_it = tconn->meta.socket == sock
1537 || !tconn->asender.task
1538 || get_t_state(&tconn->asender) != RUNNING
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001539 || tconn->cstate < C_WF_REPORT_PARAMS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
1541 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001542 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001544 drop_it = !--tconn->ko_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545 if (!drop_it) {
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001546 conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
1547 current->comm, current->pid, tconn->ko_count);
1548 request_ping(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 }
1550
1551 return drop_it; /* && (mdev->state == R_PRIMARY) */;
1552}
1553
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001554static void drbd_update_congested(struct drbd_tconn *tconn)
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001555{
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001556 struct sock *sk = tconn->data.socket->sk;
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001557 if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001558 set_bit(NET_CONGESTED, &tconn->flags);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001559}
1560
Philipp Reisnerb411b362009-09-25 16:07:19 -07001561/* The idea of sendpage seems to be to put some kind of reference
1562 * to the page into the skb, and to hand it over to the NIC. In
1563 * this process get_page() gets called.
1564 *
1565 * As soon as the page was really sent over the network put_page()
1566 * gets called by some part of the network layer. [ NIC driver? ]
1567 *
1568 * [ get_page() / put_page() increment/decrement the count. If count
1569 * reaches 0 the page will be freed. ]
1570 *
1571 * This works nicely with pages from FSs.
1572 * But this means that in protocol A we might signal IO completion too early!
1573 *
1574 * In order not to corrupt data during a resync we must make sure
1575 * that we do not reuse our own buffer pages (EEs) to early, therefore
1576 * we have the net_ee list.
1577 *
1578 * XFS seems to have problems, still, it submits pages with page_count == 0!
1579 * As a workaround, we disable sendpage on pages
1580 * with page_count == 0 or PageSlab.
1581 */
1582static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Andreas Gruenbacherb9874272011-03-16 09:41:10 +01001583 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584{
Andreas Gruenbacherb9874272011-03-16 09:41:10 +01001585 struct socket *socket;
1586 void *addr;
1587 int err;
1588
1589 socket = mdev->tconn->data.socket;
1590 addr = kmap(page) + offset;
1591 err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592 kunmap(page);
Andreas Gruenbacherb9874272011-03-16 09:41:10 +01001593 if (!err)
1594 mdev->send_cnt += size >> 9;
1595 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596}
1597
1598static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001599 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600{
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001601 struct socket *socket = mdev->tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 mm_segment_t oldfs = get_fs();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 int len = size;
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001604 int err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605
1606 /* e.g. XFS meta- & log-data is in slab pages, which have a
1607 * page_count of 0 and/or have PageSlab() set.
1608 * we cannot use send_page for those, as that does get_page();
1609 * put_page(); and would cause either a VM_BUG directly, or
1610 * __page_cache_release a page that would actually still be referenced
1611 * by someone, leading to some obscure delayed Oops somewhere else. */
1612 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001613 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001615 msg_flags |= MSG_NOSIGNAL;
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001616 drbd_update_congested(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001617 set_fs(KERNEL_DS);
1618 do {
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001619 int sent;
1620
1621 sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622 if (sent <= 0) {
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001623 if (sent == -EAGAIN) {
1624 if (we_should_drop_the_connection(mdev->tconn, socket))
1625 break;
1626 continue;
1627 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
1629 __func__, (int)size, len, sent);
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001630 if (sent < 0)
1631 err = sent;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632 break;
1633 }
1634 len -= sent;
1635 offset += sent;
1636 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
1637 set_fs(oldfs);
Philipp Reisner01a311a2011-02-07 14:30:33 +01001638 clear_bit(NET_CONGESTED, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639
Andreas Gruenbacher88b390f2011-03-16 10:44:16 +01001640 if (len == 0) {
1641 err = 0;
1642 mdev->send_cnt += size >> 9;
1643 }
1644 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645}
1646
1647static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1648{
1649 struct bio_vec *bvec;
1650 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001651 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652 __bio_for_each_segment(bvec, bio, i, 0) {
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001653 int err;
1654
1655 err = _drbd_no_send_page(mdev, bvec->bv_page,
1656 bvec->bv_offset, bvec->bv_len,
1657 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
1658 if (err)
1659 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660 }
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001661 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662}
1663
1664static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1665{
1666 struct bio_vec *bvec;
1667 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001668 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669 __bio_for_each_segment(bvec, bio, i, 0) {
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001670 int err;
1671
1672 err = _drbd_send_page(mdev, bvec->bv_page,
1673 bvec->bv_offset, bvec->bv_len,
1674 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
1675 if (err)
1676 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 }
Andreas Gruenbacher7fae55d2011-03-16 11:46:33 +01001678 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679}
1680
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001681static int _drbd_send_zc_ee(struct drbd_conf *mdev,
1682 struct drbd_peer_request *peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001683{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001684 struct page *page = peer_req->pages;
1685 unsigned len = peer_req->i.size;
Andreas Gruenbacher9f692302011-03-16 10:49:09 +01001686 int err;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001687
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001688 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001689 page_chain_for_each(page) {
1690 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Andreas Gruenbacher9f692302011-03-16 10:49:09 +01001691
1692 err = _drbd_send_page(mdev, page, 0, l,
1693 page_chain_next(page) ? MSG_MORE : 0);
1694 if (err)
1695 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001696 len -= l;
1697 }
Andreas Gruenbacher9f692302011-03-16 10:49:09 +01001698 return 0;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001699}
1700
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001701static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
1702{
Philipp Reisner31890f42011-01-19 14:12:51 +01001703 if (mdev->tconn->agreed_pro_version >= 95)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001704 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001705 (bi_rw & REQ_FUA ? DP_FUA : 0) |
1706 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
1707 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
1708 else
Jens Axboe721a9602011-03-09 11:56:30 +01001709 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001710}
1711
Philipp Reisnerb411b362009-09-25 16:07:19 -07001712/* Used to send write requests
1713 * R_PRIMARY -> Peer (P_DATA)
1714 */
1715int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
1716{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001717 struct drbd_socket *sock;
1718 struct p_data *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719 unsigned int dp_flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001720 int dgs;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001721 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722
Philipp Reisner46e1ce42011-05-16 12:57:15 +02001723 sock = &mdev->tconn->data;
1724 p = drbd_prepare_command(mdev, sock);
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001725 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ?
1726 crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001727
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001728 if (!p)
1729 return -EIO;
1730 p->sector = cpu_to_be64(req->i.sector);
1731 p->block_id = (unsigned long)req;
1732 p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq));
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001733 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734 if (mdev->state.conn >= C_SYNC_SOURCE &&
1735 mdev->state.conn <= C_PAUSED_SYNC_T)
1736 dp_flags |= DP_MAY_SET_IN_SYNC;
Philipp Reisner303d1442011-04-13 16:24:47 -07001737 if (mdev->tconn->agreed_pro_version >= 100) {
1738 if (req->rq_state & RQ_EXP_RECEIVE_ACK)
1739 dp_flags |= DP_SEND_RECEIVE_ACK;
1740 if (req->rq_state & RQ_EXP_WRITE_ACK)
1741 dp_flags |= DP_SEND_WRITE_ACK;
1742 }
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001743 p->dp_flags = cpu_to_be32(dp_flags);
1744 if (dgs)
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001745 drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001746 err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001747 if (!err) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001748 /* For protocol A, we have to memcpy the payload into
1749 * socket buffers, as we may complete right away
1750 * as soon as we handed it over to tcp, at which point the data
1751 * pages may become invalid.
1752 *
1753 * For data-integrity enabled, we copy it as well, so we can be
1754 * sure that even if the bio pages may still be modified, it
1755 * won't change the data on the wire, thus if the digest checks
1756 * out ok after sending on this side, but does not fit on the
1757 * receiving side, we sure have detected corruption elsewhere.
1758 */
Philipp Reisner303d1442011-04-13 16:24:47 -07001759 if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs)
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001760 err = _drbd_send_bio(mdev, req->master_bio);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001761 else
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001762 err = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01001763
1764 /* double check digest, sometimes buffers have been modified in flight. */
1765 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02001766 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01001767 * currently supported in kernel crypto. */
1768 unsigned char digest[64];
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001769 drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001770 if (memcmp(p + 1, digest, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001771 dev_warn(DEV,
1772 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001773 (unsigned long long)req->i.sector, req->i.size);
Lars Ellenberg470be442010-11-10 10:36:52 +01001774 }
1775 } /* else if (dgs > 64) {
1776 ... Be noisy about digest too large ...
1777 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778 }
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001779 mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001780
Andreas Gruenbacher6bdb9b02011-03-16 11:52:58 +01001781 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782}
1783
1784/* answer packet, used to send data back for read requests:
1785 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
1786 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
1787 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001788int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001789 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001791 struct drbd_socket *sock;
1792 struct p_data *p;
Andreas Gruenbacher7b57b89d2011-03-16 11:35:20 +01001793 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001794 int dgs;
1795
Philipp Reisner46e1ce42011-05-16 12:57:15 +02001796 sock = &mdev->tconn->data;
1797 p = drbd_prepare_command(mdev, sock);
1798
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001799 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ?
1800 crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001802 if (!p)
1803 return -EIO;
1804 p->sector = cpu_to_be64(peer_req->i.sector);
1805 p->block_id = peer_req->block_id;
1806 p->seq_num = 0; /* unused */
1807 if (dgs)
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02001808 drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001809 err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size);
Andreas Gruenbacher7b57b89d2011-03-16 11:35:20 +01001810 if (!err)
1811 err = _drbd_send_zc_ee(mdev, peer_req);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001812 mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001813
Andreas Gruenbacher7b57b89d2011-03-16 11:35:20 +01001814 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815}
1816
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001817int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001818{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001819 struct drbd_socket *sock;
1820 struct p_block_desc *p;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001821
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001822 sock = &mdev->tconn->data;
1823 p = drbd_prepare_command(mdev, sock);
1824 if (!p)
1825 return -EIO;
1826 p->sector = cpu_to_be64(req->i.sector);
1827 p->blksize = cpu_to_be32(req->i.size);
1828 return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001829}
1830
Philipp Reisnerb411b362009-09-25 16:07:19 -07001831/*
1832 drbd_send distinguishes two cases:
1833
1834 Packets sent via the data socket "sock"
1835 and packets sent via the meta data socket "msock"
1836
1837 sock msock
1838 -----------------+-------------------------+------------------------------
1839 timeout conf.timeout / 2 conf.timeout / 2
1840 timeout action send a ping via msock Abort communication
1841 and close all sockets
1842*/
1843
1844/*
1845 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
1846 */
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001847int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001848 void *buf, size_t size, unsigned msg_flags)
1849{
1850 struct kvec iov;
1851 struct msghdr msg;
1852 int rv, sent = 0;
1853
1854 if (!sock)
Andreas Gruenbacherc0d42c82010-12-09 23:52:22 +01001855 return -EBADR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001856
1857 /* THINK if (signal_pending) return ... ? */
1858
1859 iov.iov_base = buf;
1860 iov.iov_len = size;
1861
1862 msg.msg_name = NULL;
1863 msg.msg_namelen = 0;
1864 msg.msg_control = NULL;
1865 msg.msg_controllen = 0;
1866 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
1867
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001868 if (sock == tconn->data.socket) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02001869 rcu_read_lock();
1870 tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count;
1871 rcu_read_unlock();
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001872 drbd_update_congested(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873 }
1874 do {
1875 /* STRANGE
1876 * tcp_sendmsg does _not_ use its size parameter at all ?
1877 *
1878 * -EAGAIN on timeout, -EINTR on signal.
1879 */
1880/* THINK
1881 * do we need to block DRBD_SIG if sock == &meta.socket ??
1882 * otherwise wake_asender() might interrupt some send_*Ack !
1883 */
1884 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
1885 if (rv == -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001886 if (we_should_drop_the_connection(tconn, sock))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001887 break;
1888 else
1889 continue;
1890 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001891 if (rv == -EINTR) {
1892 flush_signals(current);
1893 rv = 0;
1894 }
1895 if (rv < 0)
1896 break;
1897 sent += rv;
1898 iov.iov_base += rv;
1899 iov.iov_len -= rv;
1900 } while (sent < size);
1901
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001902 if (sock == tconn->data.socket)
1903 clear_bit(NET_CONGESTED, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001904
1905 if (rv <= 0) {
1906 if (rv != -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001907 conn_err(tconn, "%s_sendmsg returned %d\n",
1908 sock == tconn->meta.socket ? "msock" : "sock",
1909 rv);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001910 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911 } else
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001912 conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001913 }
1914
1915 return sent;
1916}
1917
Andreas Gruenbacherfb708e42010-12-15 17:04:36 +01001918/**
1919 * drbd_send_all - Send an entire buffer
1920 *
1921 * Returns 0 upon success and a negative error value otherwise.
1922 */
1923int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
1924 size_t size, unsigned msg_flags)
1925{
1926 int err;
1927
1928 err = drbd_send(tconn, sock, buffer, size, msg_flags);
1929 if (err < 0)
1930 return err;
1931 if (err != size)
1932 return -EIO;
1933 return 0;
1934}
1935
Philipp Reisnerb411b362009-09-25 16:07:19 -07001936static int drbd_open(struct block_device *bdev, fmode_t mode)
1937{
1938 struct drbd_conf *mdev = bdev->bd_disk->private_data;
1939 unsigned long flags;
1940 int rv = 0;
1941
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001942 mutex_lock(&drbd_main_mutex);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001943 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944 /* to have a stable mdev->state.role
1945 * and no race with updating open_cnt */
1946
1947 if (mdev->state.role != R_PRIMARY) {
1948 if (mode & FMODE_WRITE)
1949 rv = -EROFS;
1950 else if (!allow_oos)
1951 rv = -EMEDIUMTYPE;
1952 }
1953
1954 if (!rv)
1955 mdev->open_cnt++;
Philipp Reisner87eeee42011-01-19 14:16:30 +01001956 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001957 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958
1959 return rv;
1960}
1961
1962static int drbd_release(struct gendisk *gd, fmode_t mode)
1963{
1964 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001965 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001966 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001967 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001968 return 0;
1969}
1970
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971static void drbd_set_defaults(struct drbd_conf *mdev)
1972{
Lars Ellenbergf3990022011-03-23 14:31:09 +01001973 /* Beware! The actual layout differs
1974 * between big endian and little endian */
Philipp Reisnerda9fbc22011-03-29 10:52:01 +02001975 mdev->state = (union drbd_dev_state) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001976 { .role = R_SECONDARY,
1977 .peer = R_UNKNOWN,
1978 .conn = C_STANDALONE,
1979 .disk = D_DISKLESS,
1980 .pdsk = D_UNKNOWN,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001981 } };
1982}
1983
1984void drbd_init_set_defaults(struct drbd_conf *mdev)
1985{
1986 /* the memset(,0,) did most of this.
1987 * note: only assignments, no allocation in here */
1988
1989 drbd_set_defaults(mdev);
1990
Philipp Reisnerb411b362009-09-25 16:07:19 -07001991 atomic_set(&mdev->ap_bio_cnt, 0);
1992 atomic_set(&mdev->ap_pending_cnt, 0);
1993 atomic_set(&mdev->rs_pending_cnt, 0);
1994 atomic_set(&mdev->unacked_cnt, 0);
1995 atomic_set(&mdev->local_cnt, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001996 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02001997 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001998 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02001999 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002000
2001 mutex_init(&mdev->md_io_mutex);
Philipp Reisner8410da8f02011-02-11 20:11:10 +01002002 mutex_init(&mdev->own_state_mutex);
2003 mdev->state_mutex = &mdev->own_state_mutex;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002004
Philipp Reisnerb411b362009-09-25 16:07:19 -07002005 spin_lock_init(&mdev->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002006 spin_lock_init(&mdev->peer_seq_lock);
2007 spin_lock_init(&mdev->epoch_lock);
2008
2009 INIT_LIST_HEAD(&mdev->active_ee);
2010 INIT_LIST_HEAD(&mdev->sync_ee);
2011 INIT_LIST_HEAD(&mdev->done_ee);
2012 INIT_LIST_HEAD(&mdev->read_ee);
2013 INIT_LIST_HEAD(&mdev->net_ee);
2014 INIT_LIST_HEAD(&mdev->resync_reads);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015 INIT_LIST_HEAD(&mdev->resync_work.list);
2016 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002017 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002018 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02002019 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02002021
Philipp Reisner794abb72010-12-27 11:51:23 +01002022 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002024 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002025 mdev->md_sync_work.cb = w_md_sync;
2026 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01002027 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnera21e9292011-02-08 15:08:49 +01002028
2029 mdev->resync_work.mdev = mdev;
2030 mdev->unplug_work.mdev = mdev;
2031 mdev->go_diskless.mdev = mdev;
2032 mdev->md_sync_work.mdev = mdev;
2033 mdev->bm_io_work.w.mdev = mdev;
2034 mdev->start_resync_work.mdev = mdev;
2035
Philipp Reisnerb411b362009-09-25 16:07:19 -07002036 init_timer(&mdev->resync_timer);
2037 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01002038 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01002039 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 mdev->resync_timer.function = resync_timer_fn;
2041 mdev->resync_timer.data = (unsigned long) mdev;
2042 mdev->md_sync_timer.function = md_sync_timer_fn;
2043 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01002044 mdev->start_resync_timer.function = start_resync_timer_fn;
2045 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01002046 mdev->request_timer.function = request_timer_fn;
2047 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002048
2049 init_waitqueue_head(&mdev->misc_wait);
2050 init_waitqueue_head(&mdev->state_wait);
2051 init_waitqueue_head(&mdev->ee_wait);
2052 init_waitqueue_head(&mdev->al_wait);
2053 init_waitqueue_head(&mdev->seq_wait);
2054
Philipp Reisner2451fc32010-08-24 13:43:11 +02002055 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002056 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002057 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
2058 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059}
2060
2061void drbd_mdev_cleanup(struct drbd_conf *mdev)
2062{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02002063 int i;
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01002064 if (mdev->tconn->receiver.t_state != NONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002065 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01002066 mdev->tconn->receiver.t_state);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002067
2068 /* no need to lock it, I'm the only thread alive */
2069 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
2070 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
2071 mdev->al_writ_cnt =
2072 mdev->bm_writ_cnt =
2073 mdev->read_cnt =
2074 mdev->recv_cnt =
2075 mdev->send_cnt =
2076 mdev->writ_cnt =
2077 mdev->p_size =
2078 mdev->rs_start =
2079 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02002080 mdev->rs_failed = 0;
2081 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002082 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02002083 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2084 mdev->rs_mark_left[i] = 0;
2085 mdev->rs_mark_time[i] = 0;
2086 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002087 D_ASSERT(mdev->tconn->net_conf == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088
2089 drbd_set_my_capacity(mdev, 0);
2090 if (mdev->bitmap) {
2091 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01002092 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002093 drbd_bm_cleanup(mdev);
2094 }
2095
Philipp Reisner1d041222011-04-22 15:20:23 +02002096 drbd_free_bc(mdev->ldev);
2097 mdev->ldev = NULL;
2098
Philipp Reisner07782862010-08-31 12:00:50 +02002099 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002100
Philipp Reisnerb411b362009-09-25 16:07:19 -07002101 D_ASSERT(list_empty(&mdev->active_ee));
2102 D_ASSERT(list_empty(&mdev->sync_ee));
2103 D_ASSERT(list_empty(&mdev->done_ee));
2104 D_ASSERT(list_empty(&mdev->read_ee));
2105 D_ASSERT(list_empty(&mdev->net_ee));
2106 D_ASSERT(list_empty(&mdev->resync_reads));
Philipp Reisnere42325a2011-01-19 13:55:45 +01002107 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
2108 D_ASSERT(list_empty(&mdev->tconn->meta.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002109 D_ASSERT(list_empty(&mdev->resync_work.list));
2110 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002111 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01002112
2113 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002114}
2115
2116
2117static void drbd_destroy_mempools(void)
2118{
2119 struct page *page;
2120
2121 while (drbd_pp_pool) {
2122 page = drbd_pp_pool;
2123 drbd_pp_pool = (struct page *)page_private(page);
2124 __free_page(page);
2125 drbd_pp_vacant--;
2126 }
2127
2128 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
2129
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002130 if (drbd_md_io_bio_set)
2131 bioset_free(drbd_md_io_bio_set);
Lars Ellenberg35abf592011-02-23 12:39:46 +01002132 if (drbd_md_io_page_pool)
2133 mempool_destroy(drbd_md_io_page_pool);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134 if (drbd_ee_mempool)
2135 mempool_destroy(drbd_ee_mempool);
2136 if (drbd_request_mempool)
2137 mempool_destroy(drbd_request_mempool);
2138 if (drbd_ee_cache)
2139 kmem_cache_destroy(drbd_ee_cache);
2140 if (drbd_request_cache)
2141 kmem_cache_destroy(drbd_request_cache);
2142 if (drbd_bm_ext_cache)
2143 kmem_cache_destroy(drbd_bm_ext_cache);
2144 if (drbd_al_ext_cache)
2145 kmem_cache_destroy(drbd_al_ext_cache);
2146
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002147 drbd_md_io_bio_set = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01002148 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149 drbd_ee_mempool = NULL;
2150 drbd_request_mempool = NULL;
2151 drbd_ee_cache = NULL;
2152 drbd_request_cache = NULL;
2153 drbd_bm_ext_cache = NULL;
2154 drbd_al_ext_cache = NULL;
2155
2156 return;
2157}
2158
2159static int drbd_create_mempools(void)
2160{
2161 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002162 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002163 int i;
2164
2165 /* prepare our caches and mempools */
2166 drbd_request_mempool = NULL;
2167 drbd_ee_cache = NULL;
2168 drbd_request_cache = NULL;
2169 drbd_bm_ext_cache = NULL;
2170 drbd_al_ext_cache = NULL;
2171 drbd_pp_pool = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01002172 drbd_md_io_page_pool = NULL;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002173 drbd_md_io_bio_set = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174
2175 /* caches */
2176 drbd_request_cache = kmem_cache_create(
2177 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
2178 if (drbd_request_cache == NULL)
2179 goto Enomem;
2180
2181 drbd_ee_cache = kmem_cache_create(
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01002182 "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002183 if (drbd_ee_cache == NULL)
2184 goto Enomem;
2185
2186 drbd_bm_ext_cache = kmem_cache_create(
2187 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
2188 if (drbd_bm_ext_cache == NULL)
2189 goto Enomem;
2190
2191 drbd_al_ext_cache = kmem_cache_create(
2192 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
2193 if (drbd_al_ext_cache == NULL)
2194 goto Enomem;
2195
2196 /* mempools */
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002197 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
2198 if (drbd_md_io_bio_set == NULL)
2199 goto Enomem;
2200
Lars Ellenberg35abf592011-02-23 12:39:46 +01002201 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
2202 if (drbd_md_io_page_pool == NULL)
2203 goto Enomem;
2204
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205 drbd_request_mempool = mempool_create(number,
2206 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
2207 if (drbd_request_mempool == NULL)
2208 goto Enomem;
2209
2210 drbd_ee_mempool = mempool_create(number,
2211 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06002212 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 goto Enomem;
2214
2215 /* drbd's page pool */
2216 spin_lock_init(&drbd_pp_lock);
2217
2218 for (i = 0; i < number; i++) {
2219 page = alloc_page(GFP_HIGHUSER);
2220 if (!page)
2221 goto Enomem;
2222 set_page_private(page, (unsigned long)drbd_pp_pool);
2223 drbd_pp_pool = page;
2224 }
2225 drbd_pp_vacant = number;
2226
2227 return 0;
2228
2229Enomem:
2230 drbd_destroy_mempools(); /* in case we allocated some */
2231 return -ENOMEM;
2232}
2233
2234static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
2235 void *unused)
2236{
2237 /* just so we have it. you never know what interesting things we
2238 * might want to do here some day...
2239 */
2240
2241 return NOTIFY_DONE;
2242}
2243
2244static struct notifier_block drbd_notifier = {
2245 .notifier_call = drbd_notify_sys,
2246};
2247
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002248static void drbd_release_all_peer_reqs(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002249{
2250 int rr;
2251
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002252 rr = drbd_free_peer_reqs(mdev, &mdev->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002253 if (rr)
2254 dev_err(DEV, "%d EEs in active list found!\n", rr);
2255
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002256 rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 if (rr)
2258 dev_err(DEV, "%d EEs in sync list found!\n", rr);
2259
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002260 rr = drbd_free_peer_reqs(mdev, &mdev->read_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002261 if (rr)
2262 dev_err(DEV, "%d EEs in read list found!\n", rr);
2263
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002264 rr = drbd_free_peer_reqs(mdev, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 if (rr)
2266 dev_err(DEV, "%d EEs in done list found!\n", rr);
2267
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002268 rr = drbd_free_peer_reqs(mdev, &mdev->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002269 if (rr)
2270 dev_err(DEV, "%d EEs in net list found!\n", rr);
2271}
2272
Philipp Reisner774b3052011-02-22 02:07:03 -05002273/* caution. no locking. */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002274void drbd_minor_destroy(struct kref *kref)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275{
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002276 struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002277 struct drbd_tconn *tconn = mdev->tconn;
2278
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279 /* paranoia asserts */
Andreas Gruenbacher70dc65e2010-12-21 14:46:57 +01002280 D_ASSERT(mdev->open_cnt == 0);
Philipp Reisnere42325a2011-01-19 13:55:45 +01002281 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282 /* end paranoia asserts */
2283
Philipp Reisnerb411b362009-09-25 16:07:19 -07002284 /* cleanup stuff that may have been allocated during
2285 * device (re-)configuration or state changes */
2286
2287 if (mdev->this_bdev)
2288 bdput(mdev->this_bdev);
2289
Philipp Reisner1d041222011-04-22 15:20:23 +02002290 drbd_free_bc(mdev->ldev);
2291 mdev->ldev = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002292
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02002293 drbd_release_all_peer_reqs(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002294
Philipp Reisnerb411b362009-09-25 16:07:19 -07002295 lc_destroy(mdev->act_log);
2296 lc_destroy(mdev->resync);
2297
2298 kfree(mdev->p_uuid);
2299 /* mdev->p_uuid = NULL; */
2300
Philipp Reisnercd1d9952011-04-11 21:24:24 -07002301 kfree(mdev->current_epoch);
2302 if (mdev->bitmap) /* should no longer be there. */
2303 drbd_bm_cleanup(mdev);
2304 __free_page(mdev->md_io_page);
2305 put_disk(mdev->vdisk);
2306 blk_cleanup_queue(mdev->rq_queue);
Philipp Reisner9958c852011-05-03 16:19:31 +02002307 kfree(mdev->rs_plan_s);
Philipp Reisnercd1d9952011-04-11 21:24:24 -07002308 kfree(mdev);
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002309
2310 kref_put(&tconn->kref, &conn_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311}
2312
2313static void drbd_cleanup(void)
2314{
2315 unsigned int i;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002316 struct drbd_conf *mdev;
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002317 struct drbd_tconn *tconn, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002318
2319 unregister_reboot_notifier(&drbd_notifier);
2320
Lars Ellenberg17a93f32010-11-24 10:37:35 +01002321 /* first remove proc,
2322 * drbdsetup uses it's presence to detect
2323 * whether DRBD is loaded.
2324 * If we would get stuck in proc removal,
2325 * but have netlink already deregistered,
2326 * some drbdsetup commands may wait forever
2327 * for an answer.
2328 */
2329 if (drbd_proc)
2330 remove_proc_entry("drbd", NULL);
2331
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002332 drbd_genl_unregister();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002333
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002334 idr_for_each_entry(&minors, mdev, i) {
2335 idr_remove(&minors, mdev_to_minor(mdev));
2336 idr_remove(&mdev->tconn->volumes, mdev->vnr);
2337 del_gendisk(mdev->vdisk);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002338 /* synchronize_rcu(); No other threads running at this point */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002339 kref_put(&mdev->kref, &drbd_minor_destroy);
2340 }
2341
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002342 /* not _rcu since, no other updater anymore. Genl already unregistered */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002343 list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002344 list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */
2345 /* synchronize_rcu(); */
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002346 kref_put(&tconn->kref, &conn_destroy);
2347 }
Philipp Reisnerff370e52011-04-11 21:10:11 -07002348
Philipp Reisner81a5d602011-02-22 19:53:16 -05002349 drbd_destroy_mempools();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002350 unregister_blkdev(DRBD_MAJOR, "drbd");
2351
Philipp Reisner81a5d602011-02-22 19:53:16 -05002352 idr_destroy(&minors);
2353
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354 printk(KERN_INFO "drbd: module cleanup done.\n");
2355}
2356
2357/**
2358 * drbd_congested() - Callback for pdflush
2359 * @congested_data: User data
2360 * @bdi_bits: Bits pdflush is currently interested in
2361 *
2362 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
2363 */
2364static int drbd_congested(void *congested_data, int bdi_bits)
2365{
2366 struct drbd_conf *mdev = congested_data;
2367 struct request_queue *q;
2368 char reason = '-';
2369 int r = 0;
2370
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01002371 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002372 /* DRBD has frozen IO */
2373 r = bdi_bits;
2374 reason = 'd';
2375 goto out;
2376 }
2377
2378 if (get_ldev(mdev)) {
2379 q = bdev_get_queue(mdev->ldev->backing_bdev);
2380 r = bdi_congested(&q->backing_dev_info, bdi_bits);
2381 put_ldev(mdev);
2382 if (r)
2383 reason = 'b';
2384 }
2385
Philipp Reisner01a311a2011-02-07 14:30:33 +01002386 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387 r |= (1 << BDI_async_congested);
2388 reason = reason == 'b' ? 'a' : 'n';
2389 }
2390
2391out:
2392 mdev->congestion_reason = reason;
2393 return r;
2394}
2395
Philipp Reisner6699b652011-02-09 11:10:24 +01002396static void drbd_init_workqueue(struct drbd_work_queue* wq)
2397{
2398 sema_init(&wq->s, 0);
2399 spin_lock_init(&wq->q_lock);
2400 INIT_LIST_HEAD(&wq->q);
2401}
2402
Philipp Reisner0ace9df2011-04-24 10:53:19 +02002403struct drbd_tconn *conn_get_by_name(const char *name)
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002404{
2405 struct drbd_tconn *tconn;
2406
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002407 if (!name || !name[0])
2408 return NULL;
2409
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002410 rcu_read_lock();
Philipp Reisnerec0bddb2011-05-04 15:47:01 +02002411 list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
Philipp Reisner0ace9df2011-04-24 10:53:19 +02002412 if (!strcmp(tconn->name, name)) {
2413 kref_get(&tconn->kref);
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002414 goto found;
Philipp Reisner0ace9df2011-04-24 10:53:19 +02002415 }
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002416 }
2417 tconn = NULL;
2418found:
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002419 rcu_read_unlock();
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002420 return tconn;
2421}
2422
Andreas Gruenbacher089c0752011-06-14 18:28:09 +02002423struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
2424 void *peer_addr, int peer_addr_len)
2425{
2426 struct drbd_tconn *tconn;
2427
2428 rcu_read_lock();
2429 list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
2430 if (tconn->my_addr_len == my_addr_len &&
2431 tconn->peer_addr_len == peer_addr_len &&
2432 !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
2433 !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
2434 kref_get(&tconn->kref);
2435 goto found;
2436 }
2437 }
2438 tconn = NULL;
2439found:
2440 rcu_read_unlock();
2441 return tconn;
2442}
2443
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002444static int drbd_alloc_socket(struct drbd_socket *socket)
2445{
2446 socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
2447 if (!socket->rbuf)
2448 return -ENOMEM;
Andreas Gruenbacher5a87d922011-03-24 21:17:52 +01002449 socket->sbuf = (void *) __get_free_page(GFP_KERNEL);
2450 if (!socket->sbuf)
2451 return -ENOMEM;
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002452 return 0;
2453}
2454
2455static void drbd_free_socket(struct drbd_socket *socket)
2456{
Andreas Gruenbacher5a87d922011-03-24 21:17:52 +01002457 free_page((unsigned long) socket->sbuf);
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002458 free_page((unsigned long) socket->rbuf);
2459}
2460
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002461void conn_free_crypto(struct drbd_tconn *tconn)
2462{
Philipp Reisner1d041222011-04-22 15:20:23 +02002463 drbd_free_sock(tconn);
2464
2465 crypto_free_hash(tconn->csums_tfm);
2466 crypto_free_hash(tconn->verify_tfm);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002467 crypto_free_hash(tconn->cram_hmac_tfm);
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02002468 crypto_free_hash(tconn->integrity_tfm);
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02002469 crypto_free_hash(tconn->peer_integrity_tfm);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002470 kfree(tconn->int_dig_in);
2471 kfree(tconn->int_dig_vv);
Philipp Reisner1d041222011-04-22 15:20:23 +02002472
2473 tconn->csums_tfm = NULL;
2474 tconn->verify_tfm = NULL;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002475 tconn->cram_hmac_tfm = NULL;
Andreas Gruenbacher8d412fc2011-04-27 20:59:18 +02002476 tconn->integrity_tfm = NULL;
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02002477 tconn->peer_integrity_tfm = NULL;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02002478 tconn->int_dig_in = NULL;
2479 tconn->int_dig_vv = NULL;
2480}
2481
Andreas Gruenbacherafbbfa82011-06-16 17:58:02 +02002482int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts)
2483{
2484 cpumask_var_t new_cpu_mask;
2485 int err;
2486
2487 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL))
2488 return -ENOMEM;
2489 /*
2490 retcode = ERR_NOMEM;
2491 drbd_msg_put_info("unable to allocate cpumask");
2492 */
2493
2494 /* silently ignore cpu mask on UP kernel */
2495 if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
2496 /* FIXME: Get rid of constant 32 here */
2497 err = __bitmap_parse(res_opts->cpu_mask, 32, 0,
2498 cpumask_bits(new_cpu_mask), nr_cpu_ids);
2499 if (err) {
2500 conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
2501 /* retcode = ERR_CPU_MASK_PARSE; */
2502 goto fail;
2503 }
2504 }
2505 tconn->res_opts = *res_opts;
2506 if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
2507 cpumask_copy(tconn->cpu_mask, new_cpu_mask);
2508 drbd_calc_cpu_mask(tconn);
2509 tconn->receiver.reset_cpu_mask = 1;
2510 tconn->asender.reset_cpu_mask = 1;
2511 tconn->worker.reset_cpu_mask = 1;
2512 }
2513 err = 0;
2514
2515fail:
2516 free_cpumask_var(new_cpu_mask);
2517 return err;
2518
2519}
2520
Philipp Reisnerec0bddb2011-05-04 15:47:01 +02002521/* caller must be under genl_lock() */
Andreas Gruenbacherafbbfa82011-06-16 17:58:02 +02002522struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
Philipp Reisner21114382011-01-19 12:26:59 +01002523{
2524 struct drbd_tconn *tconn;
2525
2526 tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
2527 if (!tconn)
2528 return NULL;
2529
2530 tconn->name = kstrdup(name, GFP_KERNEL);
2531 if (!tconn->name)
2532 goto fail;
2533
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002534 if (drbd_alloc_socket(&tconn->data))
2535 goto fail;
2536 if (drbd_alloc_socket(&tconn->meta))
2537 goto fail;
2538
Philipp Reisner774b3052011-02-22 02:07:03 -05002539 if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
2540 goto fail;
2541
Andreas Gruenbacherafbbfa82011-06-16 17:58:02 +02002542 if (set_resource_options(tconn, res_opts))
2543 goto fail;
2544
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002545 if (!tl_init(tconn))
2546 goto fail;
2547
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01002548 tconn->cstate = C_STANDALONE;
Philipp Reisner8410da8f02011-02-11 20:11:10 +01002549 mutex_init(&tconn->cstate_mutex);
Philipp Reisner6699b652011-02-09 11:10:24 +01002550 spin_lock_init(&tconn->req_lock);
Philipp Reisnera0095502011-05-03 13:14:15 +02002551 mutex_init(&tconn->conf_update);
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01002552 init_waitqueue_head(&tconn->ping_wait);
Philipp Reisner062e8792011-02-08 11:09:18 +01002553 idr_init(&tconn->volumes);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01002554
Philipp Reisner6699b652011-02-09 11:10:24 +01002555 drbd_init_workqueue(&tconn->data.work);
2556 mutex_init(&tconn->data.mutex);
2557
2558 drbd_init_workqueue(&tconn->meta.work);
2559 mutex_init(&tconn->meta.mutex);
2560
Philipp Reisner392c8802011-02-09 10:33:31 +01002561 drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
2562 drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
2563 drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
2564
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002565 kref_init(&tconn->kref);
Philipp Reisnerec0bddb2011-05-04 15:47:01 +02002566 list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns);
Philipp Reisner21114382011-01-19 12:26:59 +01002567
2568 return tconn;
2569
2570fail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002571 tl_cleanup(tconn);
Philipp Reisner774b3052011-02-22 02:07:03 -05002572 free_cpumask_var(tconn->cpu_mask);
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002573 drbd_free_socket(&tconn->meta);
2574 drbd_free_socket(&tconn->data);
Philipp Reisner21114382011-01-19 12:26:59 +01002575 kfree(tconn->name);
2576 kfree(tconn);
2577
2578 return NULL;
2579}
2580
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002581void conn_destroy(struct kref *kref)
Philipp Reisner21114382011-01-19 12:26:59 +01002582{
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002583 struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref);
2584
Philipp Reisner062e8792011-02-08 11:09:18 +01002585 idr_destroy(&tconn->volumes);
Philipp Reisner21114382011-01-19 12:26:59 +01002586
Philipp Reisner774b3052011-02-22 02:07:03 -05002587 free_cpumask_var(tconn->cpu_mask);
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002588 drbd_free_socket(&tconn->meta);
2589 drbd_free_socket(&tconn->data);
Philipp Reisner21114382011-01-19 12:26:59 +01002590 kfree(tconn->name);
Philipp Reisnerb42a70a2011-01-27 10:55:20 +01002591 kfree(tconn->int_dig_in);
2592 kfree(tconn->int_dig_vv);
Philipp Reisner21114382011-01-19 12:26:59 +01002593 kfree(tconn);
2594}
2595
Philipp Reisner774b3052011-02-22 02:07:03 -05002596enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002597{
2598 struct drbd_conf *mdev;
2599 struct gendisk *disk;
2600 struct request_queue *q;
Philipp Reisner774b3052011-02-22 02:07:03 -05002601 int vnr_got = vnr;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002602 int minor_got = minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002603 enum drbd_ret_code err = ERR_NOMEM;
Philipp Reisner774b3052011-02-22 02:07:03 -05002604
2605 mdev = minor_to_mdev(minor);
2606 if (mdev)
2607 return ERR_MINOR_EXISTS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002608
2609 /* GFP_KERNEL, we are outside of all write-out paths */
2610 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
2611 if (!mdev)
Philipp Reisner774b3052011-02-22 02:07:03 -05002612 return ERR_NOMEM;
2613
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002614 kref_get(&tconn->kref);
Philipp Reisner774b3052011-02-22 02:07:03 -05002615 mdev->tconn = tconn;
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002616
Philipp Reisnerb411b362009-09-25 16:07:19 -07002617 mdev->minor = minor;
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002618 mdev->vnr = vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002619
2620 drbd_init_set_defaults(mdev);
2621
2622 q = blk_alloc_queue(GFP_KERNEL);
2623 if (!q)
2624 goto out_no_q;
2625 mdev->rq_queue = q;
2626 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002627
2628 disk = alloc_disk(1);
2629 if (!disk)
2630 goto out_no_disk;
2631 mdev->vdisk = disk;
2632
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002633 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002634
2635 disk->queue = q;
2636 disk->major = DRBD_MAJOR;
2637 disk->first_minor = minor;
2638 disk->fops = &drbd_ops;
2639 sprintf(disk->disk_name, "drbd%d", minor);
2640 disk->private_data = mdev;
2641
2642 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
2643 /* we have no partitions. we contain only ourselves. */
2644 mdev->this_bdev->bd_contains = mdev->this_bdev;
2645
2646 q->backing_dev_info.congested_fn = drbd_congested;
2647 q->backing_dev_info.congested_data = mdev;
2648
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01002649 blk_queue_make_request(q, drbd_make_request);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002650 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2651 This triggers a max_bio_size message upon first attach or connect */
2652 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2654 blk_queue_merge_bvec(q, drbd_merge_bvec);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002655 q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002656
2657 mdev->md_io_page = alloc_page(GFP_KERNEL);
2658 if (!mdev->md_io_page)
2659 goto out_no_io_page;
2660
2661 if (drbd_bm_init(mdev))
2662 goto out_no_bitmap;
Andreas Gruenbacherdac13892011-01-21 17:18:39 +01002663 mdev->read_requests = RB_ROOT;
Andreas Gruenbacherde696712011-01-20 15:00:24 +01002664 mdev->write_requests = RB_ROOT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002665
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
2667 if (!mdev->current_epoch)
2668 goto out_no_epoch;
2669
2670 INIT_LIST_HEAD(&mdev->current_epoch->list);
2671 mdev->epochs = 1;
2672
Lars Ellenberg8432b312011-03-08 16:11:16 +01002673 if (!idr_pre_get(&minors, GFP_KERNEL))
2674 goto out_no_minor_idr;
2675 if (idr_get_new_above(&minors, mdev, minor, &minor_got))
2676 goto out_no_minor_idr;
2677 if (minor_got != minor) {
2678 err = ERR_MINOR_EXISTS;
2679 drbd_msg_put_info("requested minor exists already");
2680 goto out_idr_remove_minor;
Lars Ellenberg569083c2011-03-07 09:49:02 +01002681 }
2682
Lars Ellenberg8432b312011-03-08 16:11:16 +01002683 if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
Lars Ellenberg569083c2011-03-07 09:49:02 +01002684 goto out_idr_remove_minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002685 if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
2686 goto out_idr_remove_minor;
2687 if (vnr_got != vnr) {
2688 err = ERR_INVALID_REQUEST;
2689 drbd_msg_put_info("requested volume exists already");
2690 goto out_idr_remove_vol;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002691 }
Philipp Reisner774b3052011-02-22 02:07:03 -05002692 add_disk(disk);
Philipp Reisner81fa2e62011-05-04 15:10:30 +02002693 kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
Philipp Reisner774b3052011-02-22 02:07:03 -05002694
Philipp Reisner2325eb62011-03-15 16:56:18 +01002695 /* inherit the connection state */
2696 mdev->state.conn = tconn->cstate;
2697 if (mdev->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002698 drbd_connected(mdev);
Philipp Reisner2325eb62011-03-15 16:56:18 +01002699
Philipp Reisner774b3052011-02-22 02:07:03 -05002700 return NO_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002701
Lars Ellenberg569083c2011-03-07 09:49:02 +01002702out_idr_remove_vol:
2703 idr_remove(&tconn->volumes, vnr_got);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002704out_idr_remove_minor:
2705 idr_remove(&minors, minor_got);
Lars Ellenberg569083c2011-03-07 09:49:02 +01002706 synchronize_rcu();
Lars Ellenberg8432b312011-03-08 16:11:16 +01002707out_no_minor_idr:
Philipp Reisner81a5d602011-02-22 19:53:16 -05002708 kfree(mdev->current_epoch);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709out_no_epoch:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002710 drbd_bm_cleanup(mdev);
2711out_no_bitmap:
2712 __free_page(mdev->md_io_page);
2713out_no_io_page:
2714 put_disk(disk);
2715out_no_disk:
2716 blk_cleanup_queue(q);
2717out_no_q:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002718 kfree(mdev);
Philipp Reisner9dc9fbb2011-04-22 15:23:32 +02002719 kref_put(&tconn->kref, &conn_destroy);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002720 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002721}
2722
Philipp Reisnerb411b362009-09-25 16:07:19 -07002723int __init drbd_init(void)
2724{
2725 int err;
2726
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01002727 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728 printk(KERN_ERR
Philipp Reisner81a5d602011-02-22 19:53:16 -05002729 "drbd: invalid minor_count (%d)\n", minor_count);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002730#ifdef MODULE
2731 return -EINVAL;
2732#else
Andreas Gruenbacher46530e82011-05-31 13:08:53 +02002733 minor_count = DRBD_MINOR_COUNT_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002734#endif
2735 }
2736
Philipp Reisnerb411b362009-09-25 16:07:19 -07002737 err = register_blkdev(DRBD_MAJOR, "drbd");
2738 if (err) {
2739 printk(KERN_ERR
2740 "drbd: unable to register block device major %d\n",
2741 DRBD_MAJOR);
2742 return err;
2743 }
2744
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002745 err = drbd_genl_register();
2746 if (err) {
2747 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2748 goto fail;
2749 }
2750
2751
Philipp Reisnerb411b362009-09-25 16:07:19 -07002752 register_reboot_notifier(&drbd_notifier);
2753
2754 /*
2755 * allocate all necessary structs
2756 */
2757 err = -ENOMEM;
2758
2759 init_waitqueue_head(&drbd_pp_wait);
2760
2761 drbd_proc = NULL; /* play safe for drbd_cleanup */
Philipp Reisner81a5d602011-02-22 19:53:16 -05002762 idr_init(&minors);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763
2764 err = drbd_create_mempools();
2765 if (err)
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002766 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002767
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01002768 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002769 if (!drbd_proc) {
2770 printk(KERN_ERR "drbd: unable to register proc file\n");
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002771 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002772 }
2773
2774 rwlock_init(&global_state_lock);
Philipp Reisner21114382011-01-19 12:26:59 +01002775 INIT_LIST_HEAD(&drbd_tconns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776
2777 printk(KERN_INFO "drbd: initialized. "
2778 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2779 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2780 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
2781 printk(KERN_INFO "drbd: registered as block device major %d\n",
2782 DRBD_MAJOR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002783
2784 return 0; /* Success! */
2785
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002786fail:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787 drbd_cleanup();
2788 if (err == -ENOMEM)
2789 /* currently always the case */
2790 printk(KERN_ERR "drbd: ran out of memory\n");
2791 else
2792 printk(KERN_ERR "drbd: initialization failure\n");
2793 return err;
2794}
2795
2796void drbd_free_bc(struct drbd_backing_dev *ldev)
2797{
2798 if (ldev == NULL)
2799 return;
2800
Tejun Heoe525fd82010-11-13 11:55:17 +01002801 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2802 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803
2804 kfree(ldev);
2805}
2806
Philipp Reisner360cc742011-02-08 14:29:53 +01002807void drbd_free_sock(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002808{
Philipp Reisner360cc742011-02-08 14:29:53 +01002809 if (tconn->data.socket) {
2810 mutex_lock(&tconn->data.mutex);
2811 kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
2812 sock_release(tconn->data.socket);
2813 tconn->data.socket = NULL;
2814 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815 }
Philipp Reisner360cc742011-02-08 14:29:53 +01002816 if (tconn->meta.socket) {
2817 mutex_lock(&tconn->meta.mutex);
2818 kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
2819 sock_release(tconn->meta.socket);
2820 tconn->meta.socket = NULL;
2821 mutex_unlock(&tconn->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822 }
2823}
2824
Philipp Reisnerb411b362009-09-25 16:07:19 -07002825/* meta data management */
2826
2827struct meta_data_on_disk {
2828 u64 la_size; /* last agreed size. */
2829 u64 uuid[UI_SIZE]; /* UUIDs. */
2830 u64 device_uuid;
2831 u64 reserved_u64_1;
2832 u32 flags; /* MDF */
2833 u32 magic;
2834 u32 md_size_sect;
2835 u32 al_offset; /* offset to this block */
2836 u32 al_nr_extents; /* important for restoring the AL */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002837 /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002838 u32 bm_offset; /* offset to the bitmap, from here */
2839 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02002840 u32 la_peer_max_bio_size; /* last peer max_bio_size */
2841 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002842
2843} __packed;
2844
2845/**
2846 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
2847 * @mdev: DRBD device.
2848 */
2849void drbd_md_sync(struct drbd_conf *mdev)
2850{
2851 struct meta_data_on_disk *buffer;
2852 sector_t sector;
2853 int i;
2854
Lars Ellenbergee15b032010-09-03 10:00:09 +02002855 del_timer(&mdev->md_sync_timer);
2856 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002857 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
2858 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002859
2860 /* We use here D_FAILED and not D_ATTACHING because we try to write
2861 * metadata even if we detach due to a disk failure! */
2862 if (!get_ldev_if_state(mdev, D_FAILED))
2863 return;
2864
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 mutex_lock(&mdev->md_io_mutex);
2866 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2867 memset(buffer, 0, 512);
2868
2869 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
2870 for (i = UI_CURRENT; i < UI_SIZE; i++)
2871 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
2872 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
2873 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
2874
2875 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
2876 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
2877 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
2878 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
2879 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
2880
2881 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002882 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883
2884 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
2885 sector = mdev->ldev->md.md_offset;
2886
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +01002887 if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 /* this was a try anyways ... */
2889 dev_err(DEV, "meta data update failed!\n");
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002890 drbd_chk_io_error(mdev, 1, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002891 }
2892
2893 /* Update mdev->ldev->md.la_size_sect,
2894 * since we updated it on metadata. */
2895 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
2896
2897 mutex_unlock(&mdev->md_io_mutex);
2898 put_ldev(mdev);
2899}
2900
2901/**
2902 * drbd_md_read() - Reads in the meta data super block
2903 * @mdev: DRBD device.
2904 * @bdev: Device from which the meta data should be read in.
2905 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01002906 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
2908 */
2909int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2910{
2911 struct meta_data_on_disk *buffer;
2912 int i, rv = NO_ERROR;
2913
2914 if (!get_ldev_if_state(mdev, D_ATTACHING))
2915 return ERR_IO_MD_DISK;
2916
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917 mutex_lock(&mdev->md_io_mutex);
2918 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2919
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +01002920 if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002921 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07002922 called BEFORE disk is attached */
2923 dev_err(DEV, "Error while reading metadata.\n");
2924 rv = ERR_IO_MD_DISK;
2925 goto err;
2926 }
2927
Andreas Gruenbachere7fad8a2011-01-11 13:54:02 +01002928 if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 dev_err(DEV, "Error while reading metadata, magic not found.\n");
2930 rv = ERR_MD_INVALID;
2931 goto err;
2932 }
2933 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
2934 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
2935 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
2936 rv = ERR_MD_INVALID;
2937 goto err;
2938 }
2939 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
2940 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
2941 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
2942 rv = ERR_MD_INVALID;
2943 goto err;
2944 }
2945 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
2946 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
2947 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
2948 rv = ERR_MD_INVALID;
2949 goto err;
2950 }
2951
2952 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
2953 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
2954 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
2955 rv = ERR_MD_INVALID;
2956 goto err;
2957 }
2958
2959 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
2960 for (i = UI_CURRENT; i < UI_SIZE; i++)
2961 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
2962 bdev->md.flags = be32_to_cpu(buffer->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
2964
Philipp Reisner87eeee42011-01-19 14:16:30 +01002965 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002966 if (mdev->state.conn < C_CONNECTED) {
2967 int peer;
2968 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
2969 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
2970 mdev->peer_max_bio_size = peer;
2971 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01002972 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002973
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002974 /* This blocks wants to be get removed... */
2975 bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents);
2976 if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
2977 bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978
2979 err:
2980 mutex_unlock(&mdev->md_io_mutex);
2981 put_ldev(mdev);
2982
2983 return rv;
2984}
2985
2986/**
2987 * drbd_md_mark_dirty() - Mark meta data super block as dirty
2988 * @mdev: DRBD device.
2989 *
2990 * Call this function if you change anything that should be written to
2991 * the meta-data super block. This function sets MD_DIRTY, and starts a
2992 * timer that ensures that within five seconds you have to call drbd_md_sync().
2993 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02002994#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02002995void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
2996{
2997 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
2998 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
2999 mdev->last_md_mark_dirty.line = line;
3000 mdev->last_md_mark_dirty.func = func;
3001 }
3002}
3003#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004void drbd_md_mark_dirty(struct drbd_conf *mdev)
3005{
Lars Ellenbergee15b032010-09-03 10:00:09 +02003006 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003007 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008}
Lars Ellenbergee15b032010-09-03 10:00:09 +02003009#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010
3011static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3012{
3013 int i;
3014
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003015 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017}
3018
3019void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3020{
3021 if (idx == UI_CURRENT) {
3022 if (mdev->state.role == R_PRIMARY)
3023 val |= 1;
3024 else
3025 val &= ~((u64)1);
3026
3027 drbd_set_ed_uuid(mdev, val);
3028 }
3029
3030 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031 drbd_md_mark_dirty(mdev);
3032}
3033
3034
3035void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3036{
3037 if (mdev->ldev->md.uuid[idx]) {
3038 drbd_uuid_move_history(mdev);
3039 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040 }
3041 _drbd_uuid_set(mdev, idx, val);
3042}
3043
3044/**
3045 * drbd_uuid_new_current() - Creates a new current UUID
3046 * @mdev: DRBD device.
3047 *
3048 * Creates a new current UUID, and rotates the old current UUID into
3049 * the bitmap slot. Causes an incremental resync upon next connect.
3050 */
3051void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3052{
3053 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003054 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003056 if (bm_uuid)
3057 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3058
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003060
3061 get_random_bytes(&val, sizeof(u64));
3062 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003063 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02003064 /* get it to stable storage _now_ */
3065 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066}
3067
3068void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3069{
3070 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
3071 return;
3072
3073 if (val == 0) {
3074 drbd_uuid_move_history(mdev);
3075 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
3076 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003078 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
3079 if (bm_uuid)
3080 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003082 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 }
3084 drbd_md_mark_dirty(mdev);
3085}
3086
3087/**
3088 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3089 * @mdev: DRBD device.
3090 *
3091 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
3092 */
3093int drbd_bmio_set_n_write(struct drbd_conf *mdev)
3094{
3095 int rv = -EIO;
3096
3097 if (get_ldev_if_state(mdev, D_ATTACHING)) {
3098 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
3099 drbd_md_sync(mdev);
3100 drbd_bm_set_all(mdev);
3101
3102 rv = drbd_bm_write(mdev);
3103
3104 if (!rv) {
3105 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
3106 drbd_md_sync(mdev);
3107 }
3108
3109 put_ldev(mdev);
3110 }
3111
3112 return rv;
3113}
3114
3115/**
3116 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3117 * @mdev: DRBD device.
3118 *
3119 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
3120 */
3121int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
3122{
3123 int rv = -EIO;
3124
Philipp Reisner07782862010-08-31 12:00:50 +02003125 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126 if (get_ldev_if_state(mdev, D_ATTACHING)) {
3127 drbd_bm_clear_all(mdev);
3128 rv = drbd_bm_write(mdev);
3129 put_ldev(mdev);
3130 }
3131
3132 return rv;
3133}
3134
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003135static int w_bitmap_io(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003136{
3137 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01003138 struct drbd_conf *mdev = w->mdev;
Lars Ellenberg02851e92010-12-16 14:47:39 +01003139 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140
3141 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
3142
Lars Ellenberg02851e92010-12-16 14:47:39 +01003143 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003144 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01003145 rv = work->io_fn(mdev);
3146 drbd_bm_unlock(mdev);
3147 put_ldev(mdev);
3148 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149
Lars Ellenberg4738fa12011-02-21 13:20:55 +01003150 clear_bit_unlock(BITMAP_IO, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 wake_up(&mdev->misc_wait);
3152
3153 if (work->done)
3154 work->done(mdev, rv);
3155
3156 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
3157 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003158 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003160 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161}
3162
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003163void drbd_ldev_destroy(struct drbd_conf *mdev)
3164{
3165 lc_destroy(mdev->resync);
3166 mdev->resync = NULL;
3167 lc_destroy(mdev->act_log);
3168 mdev->act_log = NULL;
3169 __no_warn(local,
3170 drbd_free_bc(mdev->ldev);
3171 mdev->ldev = NULL;);
3172
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003173 clear_bit(GO_DISKLESS, &mdev->flags);
3174}
3175
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003176static int w_go_diskless(struct drbd_work *w, int unused)
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003177{
Philipp Reisner00d56942011-02-09 18:09:48 +01003178 struct drbd_conf *mdev = w->mdev;
3179
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003180 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02003181 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3182 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003183 * the protected members anymore, though, so once put_ldev reaches zero
3184 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003185 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003186 return 0;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003187}
3188
3189void drbd_go_diskless(struct drbd_conf *mdev)
3190{
3191 D_ASSERT(mdev->state.disk == D_FAILED);
3192 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01003193 drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003194}
3195
Philipp Reisnerb411b362009-09-25 16:07:19 -07003196/**
3197 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3198 * @mdev: DRBD device.
3199 * @io_fn: IO callback to be called when bitmap IO is possible
3200 * @done: callback to be called after the bitmap IO was performed
3201 * @why: Descriptive text of the reason for doing the IO
3202 *
3203 * While IO on the bitmap happens we freeze application IO thus we ensure
3204 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
3205 * called from worker context. It MUST NOT be used while a previous such
3206 * work is still pending!
3207 */
3208void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3209 int (*io_fn)(struct drbd_conf *),
3210 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003211 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003213 D_ASSERT(current == mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214
3215 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
3216 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
3217 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
3218 if (mdev->bm_io_work.why)
3219 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
3220 why, mdev->bm_io_work.why);
3221
3222 mdev->bm_io_work.io_fn = io_fn;
3223 mdev->bm_io_work.done = done;
3224 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003225 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226
Philipp Reisner87eeee42011-01-19 14:16:30 +01003227 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 set_bit(BITMAP_IO, &mdev->flags);
3229 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01003230 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01003231 drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003233 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234}
3235
3236/**
3237 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
3238 * @mdev: DRBD device.
3239 * @io_fn: IO callback to be called when bitmap IO is possible
3240 * @why: Descriptive text of the reason for doing the IO
3241 *
3242 * freezes application IO while that the actual IO operations runs. This
3243 * functions MAY NOT be called from worker context.
3244 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003245int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
3246 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003247{
3248 int rv;
3249
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003250 D_ASSERT(current != mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003252 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3253 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003254
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003255 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256 rv = io_fn(mdev);
3257 drbd_bm_unlock(mdev);
3258
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003259 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3260 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003261
3262 return rv;
3263}
3264
3265void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3266{
3267 if ((mdev->ldev->md.flags & flag) != flag) {
3268 drbd_md_mark_dirty(mdev);
3269 mdev->ldev->md.flags |= flag;
3270 }
3271}
3272
3273void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3274{
3275 if ((mdev->ldev->md.flags & flag) != 0) {
3276 drbd_md_mark_dirty(mdev);
3277 mdev->ldev->md.flags &= ~flag;
3278 }
3279}
3280int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3281{
3282 return (bdev->md.flags & flag) != 0;
3283}
3284
3285static void md_sync_timer_fn(unsigned long data)
3286{
3287 struct drbd_conf *mdev = (struct drbd_conf *) data;
3288
Philipp Reisnere42325a2011-01-19 13:55:45 +01003289 drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003290}
3291
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003292static int w_md_sync(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003293{
Philipp Reisner00d56942011-02-09 18:09:48 +01003294 struct drbd_conf *mdev = w->mdev;
3295
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02003297#ifdef DEBUG
3298 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
3299 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
3300#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301 drbd_md_sync(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01003302 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003303}
3304
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003305const char *cmdname(enum drbd_packet cmd)
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003306{
3307 /* THINK may need to become several global tables
3308 * when we want to support more than
3309 * one PRO_VERSION */
3310 static const char *cmdnames[] = {
3311 [P_DATA] = "Data",
3312 [P_DATA_REPLY] = "DataReply",
3313 [P_RS_DATA_REPLY] = "RSDataReply",
3314 [P_BARRIER] = "Barrier",
3315 [P_BITMAP] = "ReportBitMap",
3316 [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
3317 [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
3318 [P_UNPLUG_REMOTE] = "UnplugRemote",
3319 [P_DATA_REQUEST] = "DataRequest",
3320 [P_RS_DATA_REQUEST] = "RSDataRequest",
3321 [P_SYNC_PARAM] = "SyncParam",
3322 [P_SYNC_PARAM89] = "SyncParam89",
3323 [P_PROTOCOL] = "ReportProtocol",
3324 [P_UUIDS] = "ReportUUIDs",
3325 [P_SIZES] = "ReportSizes",
3326 [P_STATE] = "ReportState",
3327 [P_SYNC_UUID] = "ReportSyncUUID",
3328 [P_AUTH_CHALLENGE] = "AuthChallenge",
3329 [P_AUTH_RESPONSE] = "AuthResponse",
3330 [P_PING] = "Ping",
3331 [P_PING_ACK] = "PingAck",
3332 [P_RECV_ACK] = "RecvAck",
3333 [P_WRITE_ACK] = "WriteAck",
3334 [P_RS_WRITE_ACK] = "RSWriteAck",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003335 [P_DISCARD_WRITE] = "DiscardWrite",
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003336 [P_NEG_ACK] = "NegAck",
3337 [P_NEG_DREPLY] = "NegDReply",
3338 [P_NEG_RS_DREPLY] = "NegRSDReply",
3339 [P_BARRIER_ACK] = "BarrierAck",
3340 [P_STATE_CHG_REQ] = "StateChgRequest",
3341 [P_STATE_CHG_REPLY] = "StateChgReply",
3342 [P_OV_REQUEST] = "OVRequest",
3343 [P_OV_REPLY] = "OVReply",
3344 [P_OV_RESULT] = "OVResult",
3345 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
3346 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3347 [P_COMPRESSED_BITMAP] = "CBitmap",
3348 [P_DELAY_PROBE] = "DelayProbe",
3349 [P_OUT_OF_SYNC] = "OutOfSync",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003350 [P_RETRY_WRITE] = "RetryWrite",
Lars Ellenbergae25b332011-04-24 00:01:16 +02003351 [P_RS_CANCEL] = "RSCancel",
3352 [P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
3353 [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
Philipp Reisner036b17e2011-05-16 17:38:11 +02003354 [P_RETRY_WRITE] = "retry_write",
3355 [P_PROTOCOL_UPDATE] = "protocol_update",
Lars Ellenbergae25b332011-04-24 00:01:16 +02003356
3357 /* enum drbd_packet, but not commands - obsoleted flags:
3358 * P_MAY_IGNORE
3359 * P_MAX_OPT_CMD
3360 */
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003361 };
3362
Lars Ellenbergae25b332011-04-24 00:01:16 +02003363 /* too big for the array: 0xfffX */
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +02003364 if (cmd == P_INITIAL_META)
3365 return "InitialMeta";
3366 if (cmd == P_INITIAL_DATA)
3367 return "InitialData";
Andreas Gruenbacher60381782011-03-28 17:05:50 +02003368 if (cmd == P_CONNECTION_FEATURES)
3369 return "ConnectionFeatures";
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003370 if (cmd >= ARRAY_SIZE(cmdnames))
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003371 return "Unknown";
3372 return cmdnames[cmd];
3373}
3374
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003375/**
3376 * drbd_wait_misc - wait for a request to make progress
3377 * @mdev: device associated with the request
3378 * @i: the struct drbd_interval embedded in struct drbd_request or
3379 * struct drbd_peer_request
3380 */
3381int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
3382{
Philipp Reisner44ed1672011-04-19 17:10:19 +02003383 struct net_conf *nc;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003384 DEFINE_WAIT(wait);
3385 long timeout;
3386
Philipp Reisner44ed1672011-04-19 17:10:19 +02003387 rcu_read_lock();
3388 nc = rcu_dereference(mdev->tconn->net_conf);
3389 if (!nc) {
3390 rcu_read_unlock();
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003391 return -ETIMEDOUT;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003392 }
3393 timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT;
3394 rcu_read_unlock();
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003395
3396 /* Indicate to wake up mdev->misc_wait on progress. */
3397 i->waiting = true;
3398 prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
3399 spin_unlock_irq(&mdev->tconn->req_lock);
3400 timeout = schedule_timeout(timeout);
3401 finish_wait(&mdev->misc_wait, &wait);
3402 spin_lock_irq(&mdev->tconn->req_lock);
3403 if (!timeout || mdev->state.conn < C_CONNECTED)
3404 return -ETIMEDOUT;
3405 if (signal_pending(current))
3406 return -ERESTARTSYS;
3407 return 0;
3408}
3409
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410#ifdef CONFIG_DRBD_FAULT_INJECTION
3411/* Fault insertion support including random number generator shamelessly
3412 * stolen from kernel/rcutorture.c */
3413struct fault_random_state {
3414 unsigned long state;
3415 unsigned long count;
3416};
3417
3418#define FAULT_RANDOM_MULT 39916801 /* prime */
3419#define FAULT_RANDOM_ADD 479001701 /* prime */
3420#define FAULT_RANDOM_REFRESH 10000
3421
3422/*
3423 * Crude but fast random-number generator. Uses a linear congruential
3424 * generator, with occasional help from get_random_bytes().
3425 */
3426static unsigned long
3427_drbd_fault_random(struct fault_random_state *rsp)
3428{
3429 long refresh;
3430
Roel Kluin49829ea2009-12-15 22:55:44 +01003431 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432 get_random_bytes(&refresh, sizeof(refresh));
3433 rsp->state += refresh;
3434 rsp->count = FAULT_RANDOM_REFRESH;
3435 }
3436 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
3437 return swahw32(rsp->state);
3438}
3439
3440static char *
3441_drbd_fault_str(unsigned int type) {
3442 static char *_faults[] = {
3443 [DRBD_FAULT_MD_WR] = "Meta-data write",
3444 [DRBD_FAULT_MD_RD] = "Meta-data read",
3445 [DRBD_FAULT_RS_WR] = "Resync write",
3446 [DRBD_FAULT_RS_RD] = "Resync read",
3447 [DRBD_FAULT_DT_WR] = "Data write",
3448 [DRBD_FAULT_DT_RD] = "Data read",
3449 [DRBD_FAULT_DT_RA] = "Data read ahead",
3450 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02003451 [DRBD_FAULT_AL_EE] = "EE allocation",
3452 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003453 };
3454
3455 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
3456}
3457
3458unsigned int
3459_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3460{
3461 static struct fault_random_state rrs = {0, 0};
3462
3463 unsigned int ret = (
3464 (fault_devs == 0 ||
3465 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
3466 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
3467
3468 if (ret) {
3469 fault_count++;
3470
Lars Ellenberg73835062010-05-27 11:51:56 +02003471 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003472 dev_warn(DEV, "***Simulating %s failure\n",
3473 _drbd_fault_str(type));
3474 }
3475
3476 return ret;
3477}
3478#endif
3479
3480const char *drbd_buildtag(void)
3481{
3482 /* DRBD built from external sources has here a reference to the
3483 git hash of the source code. */
3484
3485 static char buildtag[38] = "\0uilt-in";
3486
3487 if (buildtag[0] == 0) {
3488#ifdef CONFIG_MODULES
3489 if (THIS_MODULE != NULL)
3490 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
3491 else
3492#endif
3493 buildtag[0] = 'b';
3494 }
3495
3496 return buildtag;
3497}
3498
3499module_init(drbd_init)
3500module_exit(drbd_cleanup)
3501
Philipp Reisnerb411b362009-09-25 16:07:19 -07003502EXPORT_SYMBOL(drbd_conn_str);
3503EXPORT_SYMBOL(drbd_role_str);
3504EXPORT_SYMBOL(drbd_disk_str);
3505EXPORT_SYMBOL(drbd_set_st_err_str);