blob: ff0085b98bee0b69aec6033ea0a02a1462adeb2f [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020059static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070060int drbdd_init(struct drbd_thread *);
61int drbd_worker(struct drbd_thread *);
62int drbd_asender(struct drbd_thread *);
63
64int drbd_init(void);
65static int drbd_open(struct block_device *bdev, fmode_t mode);
66static int drbd_release(struct gendisk *gd, fmode_t mode);
Philipp Reisner00d56942011-02-09 18:09:48 +010067static int w_md_sync(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068static void md_sync_timer_fn(unsigned long data);
Philipp Reisner00d56942011-02-09 18:09:48 +010069static int w_bitmap_io(struct drbd_work *w, int unused);
70static int w_go_diskless(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
73 "Lars Ellenberg <lars@linbit.com>");
74MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
75MODULE_VERSION(REL_VERSION);
76MODULE_LICENSE("GPL");
Philipp Reisner81a5d602011-02-22 19:53:16 -050077MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010078 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070079MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
80
81#include <linux/moduleparam.h>
82/* allow_open_on_secondary */
83MODULE_PARM_DESC(allow_oos, "DONT USE!");
84/* thanks to these macros, if compiled into the kernel (not-module),
85 * this becomes the boot parameter drbd.minor_count */
86module_param(minor_count, uint, 0444);
87module_param(disable_sendpage, bool, 0644);
88module_param(allow_oos, bool, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -070089module_param(proc_details, int, 0644);
90
91#ifdef CONFIG_DRBD_FAULT_INJECTION
92int enable_faults;
93int fault_rate;
94static int fault_count;
95int fault_devs;
96/* bitmap of enabled faults */
97module_param(enable_faults, int, 0664);
98/* fault rate % value - applies to all enabled faults */
99module_param(fault_rate, int, 0664);
100/* count of faults inserted */
101module_param(fault_count, int, 0664);
102/* bitmap of devices to insert faults on */
103module_param(fault_devs, int, 0644);
104#endif
105
106/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100107unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108int disable_sendpage;
109int allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700110int proc_details; /* Detail level in proc drbd*/
111
112/* Module parameter for setting the user mode helper program
113 * to run. Default is /sbin/drbdadm */
114char usermode_helper[80] = "/sbin/drbdadm";
115
116module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
117
118/* in 2.6.x, our device mapping and config info contains our virtual gendisks
119 * as member "struct gendisk *vdisk;"
120 */
Philipp Reisner81a5d602011-02-22 19:53:16 -0500121struct idr minors;
Philipp Reisner21114382011-01-19 12:26:59 +0100122struct list_head drbd_tconns; /* list of struct drbd_tconn */
Lars Ellenberg543cc102011-03-10 22:18:18 +0100123DEFINE_MUTEX(drbd_cfg_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700124
125struct kmem_cache *drbd_request_cache;
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +0100126struct kmem_cache *drbd_ee_cache; /* peer requests */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
128struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
129mempool_t *drbd_request_mempool;
130mempool_t *drbd_ee_mempool;
Lars Ellenberg35abf592011-02-23 12:39:46 +0100131mempool_t *drbd_md_io_page_pool;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100132struct bio_set *drbd_md_io_bio_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700133
134/* I do not use a standard mempool, because:
135 1) I want to hand out the pre-allocated objects first.
136 2) I want to be able to interrupt sleeping allocation with a signal.
137 Note: This is a single linked list, the next pointer is the private
138 member of struct page.
139 */
140struct page *drbd_pp_pool;
141spinlock_t drbd_pp_lock;
142int drbd_pp_vacant;
143wait_queue_head_t drbd_pp_wait;
144
145DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
146
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100147static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700148 .owner = THIS_MODULE,
149 .open = drbd_open,
150 .release = drbd_release,
151};
152
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100153static void bio_destructor_drbd(struct bio *bio)
154{
155 bio_free(bio, drbd_md_io_bio_set);
156}
157
158struct bio *bio_alloc_drbd(gfp_t gfp_mask)
159{
160 struct bio *bio;
161
162 if (!drbd_md_io_bio_set)
163 return bio_alloc(gfp_mask, 1);
164
165 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
166 if (!bio)
167 return NULL;
168 bio->bi_destructor = bio_destructor_drbd;
169 return bio;
170}
171
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172#ifdef __CHECKER__
173/* When checking with sparse, and this is an inline function, sparse will
174 give tons of false positives. When this is a real functions sparse works.
175 */
176int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
177{
178 int io_allowed;
179
180 atomic_inc(&mdev->local_cnt);
181 io_allowed = (mdev->state.disk >= mins);
182 if (!io_allowed) {
183 if (atomic_dec_and_test(&mdev->local_cnt))
184 wake_up(&mdev->misc_wait);
185 }
186 return io_allowed;
187}
188
189#endif
190
191/**
192 * DOC: The transfer log
193 *
194 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100195 * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196 * of the list. There is always at least one &struct drbd_tl_epoch object.
197 *
198 * Each &struct drbd_tl_epoch has a circular double linked list of requests
199 * attached.
200 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100201static int tl_init(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
203 struct drbd_tl_epoch *b;
204
205 /* during device minor initialization, we may well use GFP_KERNEL */
206 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
207 if (!b)
208 return 0;
209 INIT_LIST_HEAD(&b->requests);
210 INIT_LIST_HEAD(&b->w.list);
211 b->next = NULL;
212 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200213 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
215
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100216 tconn->oldest_tle = b;
217 tconn->newest_tle = b;
218 INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 return 1;
221}
222
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100223static void tl_cleanup(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100225 if (tconn->oldest_tle != tconn->newest_tle)
226 conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
227 if (!list_empty(&tconn->out_of_sequence_requests))
228 conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
229 kfree(tconn->oldest_tle);
230 tconn->oldest_tle = NULL;
231 kfree(tconn->unused_spare_tle);
232 tconn->unused_spare_tle = NULL;
Andreas Gruenbacherd6287692011-01-13 23:05:39 +0100233}
234
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235/**
236 * _tl_add_barrier() - Adds a barrier to the transfer log
237 * @mdev: DRBD device.
238 * @new: Barrier to be added before the current head of the TL.
239 *
240 * The caller must hold the req_lock.
241 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100242void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243{
244 struct drbd_tl_epoch *newest_before;
245
246 INIT_LIST_HEAD(&new->requests);
247 INIT_LIST_HEAD(&new->w.list);
248 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
249 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200250 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100252 newest_before = tconn->newest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253 /* never send a barrier number == 0, because that is special-cased
254 * when using TCQ for our write ordering code */
255 new->br_number = (newest_before->br_number+1) ?: 1;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100256 if (tconn->newest_tle != new) {
257 tconn->newest_tle->next = new;
258 tconn->newest_tle = new;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259 }
260}
261
262/**
263 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
264 * @mdev: DRBD device.
265 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
266 * @set_size: Expected number of requests before that barrier.
267 *
268 * In case the passed barrier_nr or set_size does not match the oldest
269 * &struct drbd_tl_epoch objects this function will cause a termination
270 * of the connection.
271 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100272void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
273 unsigned int set_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100275 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276 struct drbd_tl_epoch *b, *nob; /* next old barrier */
277 struct list_head *le, *tle;
278 struct drbd_request *r;
279
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100280 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700281
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100282 b = tconn->oldest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283
284 /* first some paranoia code */
285 if (b == NULL) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100286 conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
287 barrier_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288 goto bail;
289 }
290 if (b->br_number != barrier_nr) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100291 conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
292 barrier_nr, b->br_number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 goto bail;
294 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200295 if (b->n_writes != set_size) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100296 conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
297 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700298 goto bail;
299 }
300
301 /* Clean up list of requests processed during current epoch */
302 list_for_each_safe(le, tle, &b->requests) {
303 r = list_entry(le, struct drbd_request, tl_requests);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100304 _req_mod(r, BARRIER_ACKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700305 }
306 /* There could be requests on the list waiting for completion
307 of the write to the local disk. To avoid corruptions of
308 slab's data structures we have to remove the lists head.
309
310 Also there could have been a barrier ack out of sequence, overtaking
311 the write acks - which would be a bug and violating write ordering.
312 To not deadlock in case we lose connection while such requests are
313 still pending, we need some way to find them for the
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100314 _req_mode(CONNECTION_LOST_WHILE_PENDING).
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315
316 These have been list_move'd to the out_of_sequence_requests list in
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100317 _req_mod(, BARRIER_ACKED) above.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 */
319 list_del_init(&b->requests);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100320 mdev = b->w.mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321
322 nob = b->next;
323 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100324 _tl_add_barrier(tconn, b);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325 if (nob)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100326 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 /* if nob == NULL b was the only barrier, and becomes the new
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100328 barrier. Therefore tconn->oldest_tle points already to b */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 } else {
330 D_ASSERT(nob != NULL);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100331 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 kfree(b);
333 }
334
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100335 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 dec_ap_pending(mdev);
337
338 return;
339
340bail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100341 spin_unlock_irq(&tconn->req_lock);
342 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343}
344
Philipp Reisner617049a2010-12-22 12:48:31 +0100345
Philipp Reisner11b58e72010-05-12 17:08:26 +0200346/**
347 * _tl_restart() - Walks the transfer log, and applies an action to all requests
348 * @mdev: DRBD device.
349 * @what: The action/event to perform with all request objects
350 *
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100351 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
352 * RESTART_FROZEN_DISK_IO.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200353 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100354void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200355{
356 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200357 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200358 struct drbd_request *req;
359 int rv, n_writes, n_reads;
360
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100361 b = tconn->oldest_tle;
362 pn = &tconn->oldest_tle;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200363 while (b) {
364 n_writes = 0;
365 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200366 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200367 list_for_each_safe(le, tle, &b->requests) {
368 req = list_entry(le, struct drbd_request, tl_requests);
369 rv = _req_mod(req, what);
370
371 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
372 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
373 }
374 tmp = b->next;
375
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200376 if (n_writes) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100377 if (what == RESEND) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200378 b->n_writes = n_writes;
379 if (b->w.cb == NULL) {
380 b->w.cb = w_send_barrier;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100381 inc_ap_pending(b->w.mdev);
382 set_bit(CREATE_BARRIER, &b->w.mdev->flags);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200383 }
384
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100385 drbd_queue_work(&tconn->data.work, &b->w);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200386 }
387 pn = &b->next;
388 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200389 if (n_reads)
390 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200391 /* there could still be requests on that ring list,
392 * in case local io is still pending */
393 list_del(&b->requests);
394
395 /* dec_ap_pending corresponding to queue_barrier.
396 * the newest barrier may not have been queued yet,
397 * in which case w.cb is still NULL. */
398 if (b->w.cb != NULL)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100399 dec_ap_pending(b->w.mdev);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200400
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100401 if (b == tconn->newest_tle) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200402 /* recycle, but reinit! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100403 if (tmp != NULL)
404 conn_err(tconn, "ASSERT FAILED tmp == NULL");
Philipp Reisner11b58e72010-05-12 17:08:26 +0200405 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200406 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200407 INIT_LIST_HEAD(&b->w.list);
408 b->w.cb = NULL;
409 b->br_number = net_random();
410 b->n_writes = 0;
411
412 *pn = b;
413 break;
414 }
415 *pn = tmp;
416 kfree(b);
417 }
418 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200419 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200420 }
421}
422
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423
424/**
425 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
426 * @mdev: DRBD device.
427 *
428 * This is called after the connection to the peer was lost. The storage covered
429 * by the requests on the transfer gets marked as our of sync. Called from the
430 * receiver thread and the worker thread.
431 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100432void tl_clear(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700433{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100434 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 struct list_head *le, *tle;
436 struct drbd_request *r;
Philipp Reisnere90285e2011-03-22 12:51:21 +0100437 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100439 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100441 _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442
443 /* we expect this list to be empty. */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100444 if (!list_empty(&tconn->out_of_sequence_requests))
445 conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446
447 /* but just in case, clean it up anyways! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100448 list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 r = list_entry(le, struct drbd_request, tl_requests);
450 /* It would be nice to complete outside of spinlock.
451 * But this is easier for now. */
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100452 _req_mod(r, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453 }
454
455 /* ensure bit indicating barrier is required is clear */
Philipp Reisnere90285e2011-03-22 12:51:21 +0100456 idr_for_each_entry(&tconn->volumes, mdev, vnr)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100457 clear_bit(CREATE_BARRIER, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100459 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460}
461
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100462void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200463{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100464 spin_lock_irq(&tconn->req_lock);
465 _tl_restart(tconn, what);
466 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467}
468
Philipp Reisnerb411b362009-09-25 16:07:19 -0700469static int drbd_thread_setup(void *arg)
470{
471 struct drbd_thread *thi = (struct drbd_thread *) arg;
Philipp Reisner392c8802011-02-09 10:33:31 +0100472 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700473 unsigned long flags;
474 int retval;
475
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100476 snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
Philipp Reisner392c8802011-02-09 10:33:31 +0100477 thi->name[0], thi->tconn->name);
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100478
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479restart:
480 retval = thi->function(thi);
481
482 spin_lock_irqsave(&thi->t_lock, flags);
483
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100484 /* if the receiver has been "EXITING", the last thing it did
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 * was set the conn state to "StandAlone",
486 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
487 * and receiver thread will be "started".
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100488 * drbd_thread_start needs to set "RESTARTING" in that case.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700489 * t_state check and assignment needs to be within the same spinlock,
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100490 * so either thread_start sees EXITING, and can remap to RESTARTING,
491 * or thread_start see NONE, and can proceed as normal.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 */
493
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100494 if (thi->t_state == RESTARTING) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100495 conn_info(tconn, "Restarting %s thread\n", thi->name);
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100496 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497 spin_unlock_irqrestore(&thi->t_lock, flags);
498 goto restart;
499 }
500
501 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100502 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700503 smp_mb();
504 complete(&thi->stop);
505 spin_unlock_irqrestore(&thi->t_lock, flags);
506
Philipp Reisner392c8802011-02-09 10:33:31 +0100507 conn_info(tconn, "Terminating %s\n", current->comm);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508
509 /* Release mod reference taken when thread was started */
510 module_put(THIS_MODULE);
511 return retval;
512}
513
Philipp Reisner392c8802011-02-09 10:33:31 +0100514static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100515 int (*func) (struct drbd_thread *), char *name)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516{
517 spin_lock_init(&thi->t_lock);
518 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100519 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 thi->function = func;
Philipp Reisner392c8802011-02-09 10:33:31 +0100521 thi->tconn = tconn;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100522 strncpy(thi->name, name, ARRAY_SIZE(thi->name));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523}
524
525int drbd_thread_start(struct drbd_thread *thi)
526{
Philipp Reisner392c8802011-02-09 10:33:31 +0100527 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700528 struct task_struct *nt;
529 unsigned long flags;
530
Philipp Reisnerb411b362009-09-25 16:07:19 -0700531 /* is used from state engine doing drbd_thread_stop_nowait,
532 * while holding the req lock irqsave */
533 spin_lock_irqsave(&thi->t_lock, flags);
534
535 switch (thi->t_state) {
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100536 case NONE:
Philipp Reisner392c8802011-02-09 10:33:31 +0100537 conn_info(tconn, "Starting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100538 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700539
540 /* Get ref on module for thread - this is released when thread exits */
541 if (!try_module_get(THIS_MODULE)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100542 conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700543 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100544 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 }
546
547 init_completion(&thi->stop);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548 thi->reset_cpu_mask = 1;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100549 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 spin_unlock_irqrestore(&thi->t_lock, flags);
551 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
552
553 nt = kthread_create(drbd_thread_setup, (void *) thi,
Philipp Reisner392c8802011-02-09 10:33:31 +0100554 "drbd_%c_%s", thi->name[0], thi->tconn->name);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 if (IS_ERR(nt)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100557 conn_err(tconn, "Couldn't start thread\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700558
559 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100560 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700561 }
562 spin_lock_irqsave(&thi->t_lock, flags);
563 thi->task = nt;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100564 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565 spin_unlock_irqrestore(&thi->t_lock, flags);
566 wake_up_process(nt);
567 break;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100568 case EXITING:
569 thi->t_state = RESTARTING;
Philipp Reisner392c8802011-02-09 10:33:31 +0100570 conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100571 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700572 /* fall through */
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100573 case RUNNING:
574 case RESTARTING:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700575 default:
576 spin_unlock_irqrestore(&thi->t_lock, flags);
577 break;
578 }
579
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100580 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700581}
582
583
584void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
585{
586 unsigned long flags;
587
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100588 enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700589
590 /* may be called from state engine, holding the req lock irqsave */
591 spin_lock_irqsave(&thi->t_lock, flags);
592
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100593 if (thi->t_state == NONE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700594 spin_unlock_irqrestore(&thi->t_lock, flags);
595 if (restart)
596 drbd_thread_start(thi);
597 return;
598 }
599
600 if (thi->t_state != ns) {
601 if (thi->task == NULL) {
602 spin_unlock_irqrestore(&thi->t_lock, flags);
603 return;
604 }
605
606 thi->t_state = ns;
607 smp_mb();
608 init_completion(&thi->stop);
609 if (thi->task != current)
610 force_sig(DRBD_SIGKILL, thi->task);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611 }
612
613 spin_unlock_irqrestore(&thi->t_lock, flags);
614
615 if (wait)
616 wait_for_completion(&thi->stop);
617}
618
Philipp Reisner392c8802011-02-09 10:33:31 +0100619static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100620{
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100621 struct drbd_thread *thi =
622 task == tconn->receiver.task ? &tconn->receiver :
623 task == tconn->asender.task ? &tconn->asender :
624 task == tconn->worker.task ? &tconn->worker : NULL;
625
626 return thi;
627}
628
Philipp Reisner392c8802011-02-09 10:33:31 +0100629char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100630{
Philipp Reisner392c8802011-02-09 10:33:31 +0100631 struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100632 return thi ? thi->name : task->comm;
633}
634
Philipp Reisner80883192011-02-18 14:56:45 +0100635int conn_lowest_minor(struct drbd_tconn *tconn)
Philipp Reisner80822282011-02-08 12:46:30 +0100636{
Philipp Reisnere90285e2011-03-22 12:51:21 +0100637 int vnr = 0;
638 struct drbd_conf *mdev;
Philipp Reisner774b3052011-02-22 02:07:03 -0500639
Philipp Reisnere90285e2011-03-22 12:51:21 +0100640 mdev = idr_get_next(&tconn->volumes, &vnr);
641 if (!mdev)
Philipp Reisner774b3052011-02-22 02:07:03 -0500642 return -1;
Philipp Reisnere90285e2011-03-22 12:51:21 +0100643 return mdev_to_minor(mdev);
Philipp Reisner80822282011-02-08 12:46:30 +0100644}
Philipp Reisner774b3052011-02-22 02:07:03 -0500645
646#ifdef CONFIG_SMP
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647/**
648 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
649 * @mdev: DRBD device.
650 *
651 * Forces all threads of a device onto the same CPU. This is beneficial for
652 * DRBD's performance. May be overwritten by user's configuration.
653 */
Philipp Reisner80822282011-02-08 12:46:30 +0100654void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700655{
656 int ord, cpu;
657
658 /* user override. */
Philipp Reisner80822282011-02-08 12:46:30 +0100659 if (cpumask_weight(tconn->cpu_mask))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700660 return;
661
Philipp Reisner80822282011-02-08 12:46:30 +0100662 ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 for_each_online_cpu(cpu) {
664 if (ord-- == 0) {
Philipp Reisner80822282011-02-08 12:46:30 +0100665 cpumask_set_cpu(cpu, tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666 return;
667 }
668 }
669 /* should not be reached */
Philipp Reisner80822282011-02-08 12:46:30 +0100670 cpumask_setall(tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671}
672
673/**
674 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
675 * @mdev: DRBD device.
Philipp Reisnerbc31fe32011-02-07 11:14:38 +0100676 * @thi: drbd_thread object
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 *
678 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
679 * prematurely.
680 */
Philipp Reisner80822282011-02-08 12:46:30 +0100681void drbd_thread_current_set_cpu(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682{
683 struct task_struct *p = current;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100684
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 if (!thi->reset_cpu_mask)
686 return;
687 thi->reset_cpu_mask = 0;
Philipp Reisner392c8802011-02-09 10:33:31 +0100688 set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689}
690#endif
691
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100692static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100693{
694 h->magic = cpu_to_be32(DRBD_MAGIC);
695 h->command = cpu_to_be16(cmd);
696 h->length = cpu_to_be16(size);
697}
698
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100699static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100700{
701 h->magic = cpu_to_be16(DRBD_MAGIC_BIG);
702 h->command = cpu_to_be16(cmd);
703 h->length = cpu_to_be32(size);
704}
705
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100706static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h,
707 enum drbd_packet cmd, int size)
708{
709 if (tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET)
710 prepare_header95(&h->h95, cmd, size);
711 else
712 prepare_header80(&h->h80, cmd, size);
713}
714
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100715static void prepare_header(struct drbd_conf *mdev, struct p_header *h,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100716 enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100717{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100718 _prepare_header(mdev->tconn, mdev->vnr, h, cmd, size);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100719}
720
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721/* the appropriate socket mutex must be held already */
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100722int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100723 enum drbd_packet cmd, struct p_header *h, size_t size,
724 unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725{
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100726 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100728 _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header));
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100729 err = drbd_send_all(tconn, sock, h, size, msg_flags);
730 if (err && !signal_pending(current))
731 conn_warn(tconn, "short send %s size=%d\n",
732 cmdname(cmd), (int)size);
733 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700734}
735
736/* don't pass the socket. we may only look at it
737 * when we hold the appropriate socket mutex.
738 */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100739int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100740 enum drbd_packet cmd, struct p_header *h, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741{
742 int ok = 0;
743 struct socket *sock;
744
745 if (use_data_socket) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100746 mutex_lock(&tconn->data.mutex);
747 sock = tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748 } else {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100749 mutex_lock(&tconn->meta.mutex);
750 sock = tconn->meta.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700751 }
752
753 /* drbd_disconnect() could have called drbd_free_sock()
754 * while we were waiting in down()... */
755 if (likely(sock != NULL))
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100756 ok = !_conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757
758 if (use_data_socket)
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100759 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700760 else
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100761 mutex_unlock(&tconn->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700762 return ok;
763}
764
Philipp Reisner61120872011-02-08 09:50:54 +0100765int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766 size_t size)
767{
Philipp Reisner61120872011-02-08 09:50:54 +0100768 struct p_header80 h;
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +0100769 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770
Philipp Reisner61120872011-02-08 09:50:54 +0100771 prepare_header80(&h, cmd, size);
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +0100772 err = drbd_get_data_sock(tconn);
773 if (!err) {
774 err = drbd_send_all(tconn, tconn->data.socket, &h, sizeof(h), 0);
775 if (!err)
776 err = drbd_send_all(tconn, tconn->data.socket, data, size, 0);
777 drbd_put_data_sock(tconn);
778 }
779 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780}
781
Lars Ellenbergf3990022011-03-23 14:31:09 +0100782int drbd_send_sync_param(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783{
Philipp Reisner8e26f9c2010-07-06 17:25:54 +0200784 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 struct socket *sock;
786 int size, rv;
Philipp Reisner31890f42011-01-19 14:12:51 +0100787 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788
789 size = apv <= 87 ? sizeof(struct p_rs_param)
790 : apv == 88 ? sizeof(struct p_rs_param)
Lars Ellenbergf3990022011-03-23 14:31:09 +0100791 + strlen(mdev->tconn->net_conf->verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +0200792 : apv <= 94 ? sizeof(struct p_rs_param_89)
793 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794
795 /* used from admin command context and receiver/worker context.
796 * to avoid kmalloc, grab the socket right here,
797 * then use the pre-allocated sbuf there */
Philipp Reisnere42325a2011-01-19 13:55:45 +0100798 mutex_lock(&mdev->tconn->data.mutex);
799 sock = mdev->tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700800
801 if (likely(sock != NULL)) {
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100802 enum drbd_packet cmd =
803 apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804
Philipp Reisnere42325a2011-01-19 13:55:45 +0100805 p = &mdev->tconn->data.sbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806
807 /* initialize verify_alg and csums_alg */
808 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
809
Lars Ellenbergf3990022011-03-23 14:31:09 +0100810 if (get_ldev(mdev)) {
811 p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate);
812 p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead);
813 p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target);
814 p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target);
815 p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate);
816 put_ldev(mdev);
817 } else {
818 p->rate = cpu_to_be32(DRBD_RATE_DEF);
819 p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
820 p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
821 p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
822 p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
823 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824
825 if (apv >= 88)
Lars Ellenbergf3990022011-03-23 14:31:09 +0100826 strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827 if (apv >= 89)
Lars Ellenbergf3990022011-03-23 14:31:09 +0100828 strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829
830 rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
831 } else
832 rv = 0; /* not ok */
833
Philipp Reisnere42325a2011-01-19 13:55:45 +0100834 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700835
836 return rv;
837}
838
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100839int drbd_send_protocol(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840{
841 struct p_protocol *p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100842 int size, cf, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843
844 size = sizeof(struct p_protocol);
845
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100846 if (tconn->agreed_pro_version >= 87)
847 size += strlen(tconn->net_conf->integrity_alg) + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848
849 /* we must not recurse into our own queue,
850 * as that is blocked during handshake */
851 p = kmalloc(size, GFP_NOIO);
852 if (p == NULL)
853 return 0;
854
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100855 p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol);
856 p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p);
857 p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p);
858 p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p);
859 p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100861 cf = 0;
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100862 if (tconn->net_conf->want_lose)
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100863 cf |= CF_WANT_LOSE;
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100864 if (tconn->net_conf->dry_run) {
865 if (tconn->agreed_pro_version >= 92)
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100866 cf |= CF_DRY_RUN;
867 else {
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100868 conn_err(tconn, "--dry-run is not supported by peer");
Dan Carpenter7ac314c2010-04-22 14:27:23 +0200869 kfree(p);
Philipp Reisner148efa12011-01-15 00:21:15 +0100870 return -1;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100871 }
872 }
873 p->conn_flags = cpu_to_be32(cf);
874
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100875 if (tconn->agreed_pro_version >= 87)
876 strcpy(p->integrity_alg, tconn->net_conf->integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +0100878 rv = !conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 kfree(p);
880 return rv;
881}
882
883int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
884{
885 struct p_uuids p;
886 int i;
887
888 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
889 return 1;
890
891 for (i = UI_CURRENT; i < UI_SIZE; i++)
892 p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
893
894 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
895 p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
Philipp Reisner89e58e72011-01-19 13:12:45 +0100896 uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
898 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
899 p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
900
901 put_ldev(mdev);
902
Philipp Reisnerc0129492011-01-19 16:58:16 +0100903 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904}
905
906int drbd_send_uuids(struct drbd_conf *mdev)
907{
908 return _drbd_send_uuids(mdev, 0);
909}
910
911int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
912{
913 return _drbd_send_uuids(mdev, 8);
914}
915
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100916void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
917{
918 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
919 u64 *uuid = mdev->ldev->md.uuid;
920 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
921 text,
922 (unsigned long long)uuid[UI_CURRENT],
923 (unsigned long long)uuid[UI_BITMAP],
924 (unsigned long long)uuid[UI_HISTORY_START],
925 (unsigned long long)uuid[UI_HISTORY_END]);
926 put_ldev(mdev);
927 } else {
928 dev_info(DEV, "%s effective data uuid: %016llX\n",
929 text,
930 (unsigned long long)mdev->ed_uuid);
931 }
932}
933
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100934int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700935{
936 struct p_rs_uuid p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100937 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100939 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
940
Philipp Reisner4a23f262011-01-11 17:42:17 +0100941 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100942 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100943 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100944 drbd_md_sync(mdev);
945 p.uuid = cpu_to_be64(uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946
Philipp Reisnerc0129492011-01-19 16:58:16 +0100947 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948}
949
Philipp Reisnere89b5912010-03-24 17:11:33 +0100950int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951{
952 struct p_sizes p;
953 sector_t d_size, u_size;
Philipp Reisner99432fc2011-05-20 16:39:13 +0200954 int q_order_type, max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 int ok;
956
957 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
958 D_ASSERT(mdev->ldev->backing_bdev);
959 d_size = drbd_get_max_capacity(mdev->ldev);
960 u_size = mdev->ldev->dc.disk_size;
961 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +0200962 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
963 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964 put_ldev(mdev);
965 } else {
966 d_size = 0;
967 u_size = 0;
968 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +0200969 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 }
971
972 p.d_size = cpu_to_be64(d_size);
973 p.u_size = cpu_to_be64(u_size);
974 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
Philipp Reisner99432fc2011-05-20 16:39:13 +0200975 p.max_bio_size = cpu_to_be32(max_bio_size);
Philipp Reisnere89b5912010-03-24 17:11:33 +0100976 p.queue_order_type = cpu_to_be16(q_order_type);
977 p.dds_flags = cpu_to_be16(flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Philipp Reisnerc0129492011-01-19 16:58:16 +0100979 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 return ok;
981}
982
983/**
984 * drbd_send_state() - Sends the drbd state to the peer
985 * @mdev: DRBD device.
986 */
987int drbd_send_state(struct drbd_conf *mdev)
988{
989 struct socket *sock;
990 struct p_state p;
991 int ok = 0;
992
Philipp Reisnere42325a2011-01-19 13:55:45 +0100993 mutex_lock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
995 p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
Philipp Reisnere42325a2011-01-19 13:55:45 +0100996 sock = mdev->tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997
998 if (likely(sock != NULL)) {
Philipp Reisnerc0129492011-01-19 16:58:16 +0100999 ok = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000 }
1001
Philipp Reisnere42325a2011-01-19 13:55:45 +01001002 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004 return ok;
1005}
1006
Philipp Reisnercf29c9d2011-02-11 15:11:24 +01001007int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd,
1008 union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009{
1010 struct p_req_state p;
1011
1012 p.mask = cpu_to_be32(mask.i);
1013 p.val = cpu_to_be32(val.i);
1014
Philipp Reisnercf29c9d2011-02-11 15:11:24 +01001015 return conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016}
1017
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001018int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019{
1020 struct p_req_state_reply p;
1021
1022 p.retcode = cpu_to_be32(retcode);
1023
Philipp Reisnerc0129492011-01-19 16:58:16 +01001024 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025}
1026
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001027int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
1028{
1029 struct p_req_state_reply p;
1030 enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
1031
1032 p.retcode = cpu_to_be32(retcode);
1033
1034 return conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p));
1035}
1036
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037int fill_bitmap_rle_bits(struct drbd_conf *mdev,
1038 struct p_compressed_bm *p,
1039 struct bm_xfer_ctx *c)
1040{
1041 struct bitstream bs;
1042 unsigned long plain_bits;
1043 unsigned long tmp;
1044 unsigned long rl;
1045 unsigned len;
1046 unsigned toggle;
1047 int bits;
1048
1049 /* may we use this feature? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01001050 if ((mdev->tconn->net_conf->use_rle == 0) ||
Philipp Reisner31890f42011-01-19 14:12:51 +01001051 (mdev->tconn->agreed_pro_version < 90))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 return 0;
1053
1054 if (c->bit_offset >= c->bm_bits)
1055 return 0; /* nothing to do. */
1056
1057 /* use at most thus many bytes */
1058 bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
1059 memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
1060 /* plain bits covered in this code string */
1061 plain_bits = 0;
1062
1063 /* p->encoding & 0x80 stores whether the first run length is set.
1064 * bit offset is implicit.
1065 * start with toggle == 2 to be able to tell the first iteration */
1066 toggle = 2;
1067
1068 /* see how much plain bits we can stuff into one packet
1069 * using RLE and VLI. */
1070 do {
1071 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
1072 : _drbd_bm_find_next(mdev, c->bit_offset);
1073 if (tmp == -1UL)
1074 tmp = c->bm_bits;
1075 rl = tmp - c->bit_offset;
1076
1077 if (toggle == 2) { /* first iteration */
1078 if (rl == 0) {
1079 /* the first checked bit was set,
1080 * store start value, */
1081 DCBP_set_start(p, 1);
1082 /* but skip encoding of zero run length */
1083 toggle = !toggle;
1084 continue;
1085 }
1086 DCBP_set_start(p, 0);
1087 }
1088
1089 /* paranoia: catch zero runlength.
1090 * can only happen if bitmap is modified while we scan it. */
1091 if (rl == 0) {
1092 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
1093 "t:%u bo:%lu\n", toggle, c->bit_offset);
1094 return -1;
1095 }
1096
1097 bits = vli_encode_bits(&bs, rl);
1098 if (bits == -ENOBUFS) /* buffer full */
1099 break;
1100 if (bits <= 0) {
1101 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
1102 return 0;
1103 }
1104
1105 toggle = !toggle;
1106 plain_bits += rl;
1107 c->bit_offset = tmp;
1108 } while (c->bit_offset < c->bm_bits);
1109
1110 len = bs.cur.b - p->code + !!bs.cur.bit;
1111
1112 if (plain_bits < (len << 3)) {
1113 /* incompressible with this method.
1114 * we need to rewind both word and bit position. */
1115 c->bit_offset -= plain_bits;
1116 bm_xfer_ctx_bit_to_word_offset(c);
1117 c->bit_offset = c->word_offset * BITS_PER_LONG;
1118 return 0;
1119 }
1120
1121 /* RLE + VLI was able to compress it just fine.
1122 * update c->word_offset. */
1123 bm_xfer_ctx_bit_to_word_offset(c);
1124
1125 /* store pad_bits */
1126 DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
1127
1128 return len;
1129}
1130
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001131/**
1132 * send_bitmap_rle_or_plain
1133 *
1134 * Return 0 when done, 1 when another iteration is needed, and a negative error
1135 * code upon failure.
1136 */
1137static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138send_bitmap_rle_or_plain(struct drbd_conf *mdev,
Philipp Reisnerc0129492011-01-19 16:58:16 +01001139 struct p_header *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140{
1141 struct p_compressed_bm *p = (void*)h;
1142 unsigned long num_words;
1143 int len;
1144 int ok;
1145
1146 len = fill_bitmap_rle_bits(mdev, p, c);
1147
1148 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001149 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001150
1151 if (len) {
1152 DCBP_set_code(p, RLE_VLI_Bits);
Philipp Reisnere42325a2011-01-19 13:55:45 +01001153 ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154 sizeof(*p) + len, 0);
1155
1156 c->packets[0]++;
1157 c->bytes[0] += sizeof(*p) + len;
1158
1159 if (c->bit_offset >= c->bm_bits)
1160 len = 0; /* DONE */
1161 } else {
1162 /* was not compressible.
1163 * send a buffer full of plain text bits instead. */
1164 num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
1165 len = num_words * sizeof(long);
1166 if (len)
1167 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
Philipp Reisnere42325a2011-01-19 13:55:45 +01001168 ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001169 h, sizeof(struct p_header80) + len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170 c->word_offset += num_words;
1171 c->bit_offset = c->word_offset * BITS_PER_LONG;
1172
1173 c->packets[1]++;
Philipp Reisner0b70a132010-08-20 13:36:10 +02001174 c->bytes[1] += sizeof(struct p_header80) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175
1176 if (c->bit_offset > c->bm_bits)
1177 c->bit_offset = c->bm_bits;
1178 }
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001179 if (ok) {
1180 if (len == 0) {
1181 INFO_bm_xfer_stats(mdev, "send", c);
1182 return 0;
1183 } else
1184 return 1;
1185 }
1186 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187}
1188
1189/* See the comment at receive_bitmap() */
1190int _drbd_send_bitmap(struct drbd_conf *mdev)
1191{
1192 struct bm_xfer_ctx c;
Philipp Reisnerc0129492011-01-19 16:58:16 +01001193 struct p_header *p;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001194 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001196 if (!expect(mdev->bitmap))
1197 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198
1199 /* maybe we should use some per thread scratch page,
1200 * and allocate that during initial device creation? */
Philipp Reisnerc0129492011-01-19 16:58:16 +01001201 p = (struct p_header *) __get_free_page(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202 if (!p) {
1203 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001204 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 }
1206
1207 if (get_ldev(mdev)) {
1208 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1209 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
1210 drbd_bm_set_all(mdev);
1211 if (drbd_bm_write(mdev)) {
1212 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
1213 * but otherwise process as per normal - need to tell other
1214 * side that a full resync is required! */
1215 dev_err(DEV, "Failed to write bitmap to disk!\n");
1216 } else {
1217 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
1218 drbd_md_sync(mdev);
1219 }
1220 }
1221 put_ldev(mdev);
1222 }
1223
1224 c = (struct bm_xfer_ctx) {
1225 .bm_bits = drbd_bm_bits(mdev),
1226 .bm_words = drbd_bm_words(mdev),
1227 };
1228
1229 do {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001230 err = send_bitmap_rle_or_plain(mdev, p, &c);
1231 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232
1233 free_page((unsigned long) p);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001234 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235}
1236
1237int drbd_send_bitmap(struct drbd_conf *mdev)
1238{
1239 int err;
1240
Andreas Gruenbacher11b0be22011-03-15 16:15:10 +01001241 if (drbd_get_data_sock(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 return -1;
1243 err = !_drbd_send_bitmap(mdev);
Philipp Reisner61120872011-02-08 09:50:54 +01001244 drbd_put_data_sock(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 return err;
1246}
1247
1248int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
1249{
1250 int ok;
1251 struct p_barrier_ack p;
1252
1253 p.barrier = barrier_nr;
1254 p.set_size = cpu_to_be32(set_size);
1255
1256 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001257 return false;
Philipp Reisnerc0129492011-01-19 16:58:16 +01001258 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259 return ok;
1260}
1261
1262/**
1263 * _drbd_send_ack() - Sends an ack packet
1264 * @mdev: DRBD device.
1265 * @cmd: Packet command code.
1266 * @sector: sector, needs to be in big endian byte order
1267 * @blksize: size in byte, needs to be in big endian byte order
1268 * @block_id: Id, big endian byte order
1269 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001270static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
1271 u64 sector, u32 blksize, u64 block_id)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272{
1273 int ok;
1274 struct p_block_ack p;
1275
1276 p.sector = sector;
1277 p.block_id = block_id;
1278 p.blksize = blksize;
Andreas Gruenbacher8ccf2182011-02-24 11:35:43 +01001279 p.seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280
Philipp Reisnere42325a2011-01-19 13:55:45 +01001281 if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001282 return false;
Philipp Reisnerc0129492011-01-19 16:58:16 +01001283 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284 return ok;
1285}
1286
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001287/* dp->sector and dp->block_id already/still in network byte order,
1288 * data_size is payload size according to dp->head,
1289 * and may need to be corrected for digest size. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001290int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001291 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001292{
Philipp Reisnera0638452011-01-19 14:31:32 +01001293 data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1294 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
1296 dp->block_id);
1297}
1298
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001299int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 struct p_block_req *rp)
1301{
1302 return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
1303}
1304
1305/**
1306 * drbd_send_ack() - Sends an ack packet
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001307 * @mdev: DRBD device
1308 * @cmd: packet command code
1309 * @peer_req: peer request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001310 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001311int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001312 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313{
1314 return _drbd_send_ack(mdev, cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001315 cpu_to_be64(peer_req->i.sector),
1316 cpu_to_be32(peer_req->i.size),
1317 peer_req->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318}
1319
1320/* This function misuses the block_id field to signal if the blocks
1321 * are is sync or not. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001322int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323 sector_t sector, int blksize, u64 block_id)
1324{
1325 return _drbd_send_ack(mdev, cmd,
1326 cpu_to_be64(sector),
1327 cpu_to_be32(blksize),
1328 cpu_to_be64(block_id));
1329}
1330
1331int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1332 sector_t sector, int size, u64 block_id)
1333{
1334 int ok;
1335 struct p_block_req p;
1336
1337 p.sector = cpu_to_be64(sector);
1338 p.block_id = block_id;
1339 p.blksize = cpu_to_be32(size);
1340
Philipp Reisnerc0129492011-01-19 16:58:16 +01001341 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342 return ok;
1343}
1344
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001345int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
1346 void *digest, int digest_size, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001347{
1348 int ok;
1349 struct p_block_req p;
1350
Philipp Reisnerfd340c12011-01-19 16:57:39 +01001351 prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 p.sector = cpu_to_be64(sector);
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +01001353 p.block_id = ID_SYNCER /* unused */;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354 p.blksize = cpu_to_be32(size);
1355
Philipp Reisnere42325a2011-01-19 13:55:45 +01001356 mutex_lock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001357
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001358 ok = (sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0));
1359 ok = ok && (digest_size == drbd_send(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360
Philipp Reisnere42325a2011-01-19 13:55:45 +01001361 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001362
1363 return ok;
1364}
1365
1366int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
1367{
1368 int ok;
1369 struct p_block_req p;
1370
1371 p.sector = cpu_to_be64(sector);
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +01001372 p.block_id = ID_SYNCER /* unused */;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 p.blksize = cpu_to_be32(size);
1374
Philipp Reisnerc0129492011-01-19 16:58:16 +01001375 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376 return ok;
1377}
1378
1379/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001380 * returns false if we should retry,
1381 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07001382 */
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001383static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001384{
1385 int drop_it;
1386 /* long elapsed = (long)(jiffies - mdev->last_received); */
1387
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001388 drop_it = tconn->meta.socket == sock
1389 || !tconn->asender.task
1390 || get_t_state(&tconn->asender) != RUNNING
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001391 || tconn->cstate < C_WF_REPORT_PARAMS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001392
1393 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001394 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001396 drop_it = !--tconn->ko_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001397 if (!drop_it) {
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001398 conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
1399 current->comm, current->pid, tconn->ko_count);
1400 request_ping(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401 }
1402
1403 return drop_it; /* && (mdev->state == R_PRIMARY) */;
1404}
1405
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001406static void drbd_update_congested(struct drbd_tconn *tconn)
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001407{
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001408 struct sock *sk = tconn->data.socket->sk;
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001409 if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001410 set_bit(NET_CONGESTED, &tconn->flags);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001411}
1412
Philipp Reisnerb411b362009-09-25 16:07:19 -07001413/* The idea of sendpage seems to be to put some kind of reference
1414 * to the page into the skb, and to hand it over to the NIC. In
1415 * this process get_page() gets called.
1416 *
1417 * As soon as the page was really sent over the network put_page()
1418 * gets called by some part of the network layer. [ NIC driver? ]
1419 *
1420 * [ get_page() / put_page() increment/decrement the count. If count
1421 * reaches 0 the page will be freed. ]
1422 *
1423 * This works nicely with pages from FSs.
1424 * But this means that in protocol A we might signal IO completion too early!
1425 *
1426 * In order not to corrupt data during a resync we must make sure
1427 * that we do not reuse our own buffer pages (EEs) to early, therefore
1428 * we have the net_ee list.
1429 *
1430 * XFS seems to have problems, still, it submits pages with page_count == 0!
1431 * As a workaround, we disable sendpage on pages
1432 * with page_count == 0 or PageSlab.
1433 */
1434static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001435 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001436{
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001437 int sent = drbd_send(mdev->tconn, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438 kunmap(page);
1439 if (sent == size)
1440 mdev->send_cnt += size>>9;
1441 return sent == size;
1442}
1443
1444static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001445 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001446{
1447 mm_segment_t oldfs = get_fs();
1448 int sent, ok;
1449 int len = size;
1450
1451 /* e.g. XFS meta- & log-data is in slab pages, which have a
1452 * page_count of 0 and/or have PageSlab() set.
1453 * we cannot use send_page for those, as that does get_page();
1454 * put_page(); and would cause either a VM_BUG directly, or
1455 * __page_cache_release a page that would actually still be referenced
1456 * by someone, leading to some obscure delayed Oops somewhere else. */
1457 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001458 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001460 msg_flags |= MSG_NOSIGNAL;
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001461 drbd_update_congested(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 set_fs(KERNEL_DS);
1463 do {
Philipp Reisnere42325a2011-01-19 13:55:45 +01001464 sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465 offset, len,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001466 msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467 if (sent == -EAGAIN) {
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001468 if (we_should_drop_the_connection(mdev->tconn,
Philipp Reisnere42325a2011-01-19 13:55:45 +01001469 mdev->tconn->data.socket))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470 break;
1471 else
1472 continue;
1473 }
1474 if (sent <= 0) {
1475 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
1476 __func__, (int)size, len, sent);
1477 break;
1478 }
1479 len -= sent;
1480 offset += sent;
1481 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
1482 set_fs(oldfs);
Philipp Reisner01a311a2011-02-07 14:30:33 +01001483 clear_bit(NET_CONGESTED, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484
1485 ok = (len == 0);
1486 if (likely(ok))
1487 mdev->send_cnt += size>>9;
1488 return ok;
1489}
1490
1491static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1492{
1493 struct bio_vec *bvec;
1494 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001495 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001496 __bio_for_each_segment(bvec, bio, i, 0) {
1497 if (!_drbd_no_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001498 bvec->bv_offset, bvec->bv_len,
1499 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500 return 0;
1501 }
1502 return 1;
1503}
1504
1505static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1506{
1507 struct bio_vec *bvec;
1508 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001509 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510 __bio_for_each_segment(bvec, bio, i, 0) {
1511 if (!_drbd_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001512 bvec->bv_offset, bvec->bv_len,
1513 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514 return 0;
1515 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516 return 1;
1517}
1518
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001519static int _drbd_send_zc_ee(struct drbd_conf *mdev,
1520 struct drbd_peer_request *peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001521{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001522 struct page *page = peer_req->pages;
1523 unsigned len = peer_req->i.size;
1524
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001525 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001526 page_chain_for_each(page) {
1527 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001528 if (!_drbd_send_page(mdev, page, 0, l,
1529 page_chain_next(page) ? MSG_MORE : 0))
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001530 return 0;
1531 len -= l;
1532 }
1533 return 1;
1534}
1535
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001536static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
1537{
Philipp Reisner31890f42011-01-19 14:12:51 +01001538 if (mdev->tconn->agreed_pro_version >= 95)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001539 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001540 (bi_rw & REQ_FUA ? DP_FUA : 0) |
1541 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
1542 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
1543 else
Jens Axboe721a9602011-03-09 11:56:30 +01001544 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001545}
1546
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547/* Used to send write requests
1548 * R_PRIMARY -> Peer (P_DATA)
1549 */
1550int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
1551{
1552 int ok = 1;
1553 struct p_data p;
1554 unsigned int dp_flags = 0;
1555 void *dgb;
1556 int dgs;
1557
Andreas Gruenbacher11b0be22011-03-15 16:15:10 +01001558 if (drbd_get_data_sock(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 return 0;
1560
Philipp Reisnera0638452011-01-19 14:31:32 +01001561 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
1562 crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563
Philipp Reisnerfd340c12011-01-19 16:57:39 +01001564 prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001565 p.sector = cpu_to_be64(req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566 p.block_id = (unsigned long)req;
Andreas Gruenbacher8ccf2182011-02-24 11:35:43 +01001567 p.seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001569 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
1570
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571 if (mdev->state.conn >= C_SYNC_SOURCE &&
1572 mdev->state.conn <= C_PAUSED_SYNC_T)
1573 dp_flags |= DP_MAY_SET_IN_SYNC;
1574
1575 p.dp_flags = cpu_to_be32(dp_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 set_bit(UNPLUG_REMOTE, &mdev->flags);
1577 ok = (sizeof(p) ==
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001578 drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 if (ok && dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001580 dgb = mdev->tconn->int_dig_out;
1581 drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb);
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001582 ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583 }
1584 if (ok) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001585 /* For protocol A, we have to memcpy the payload into
1586 * socket buffers, as we may complete right away
1587 * as soon as we handed it over to tcp, at which point the data
1588 * pages may become invalid.
1589 *
1590 * For data-integrity enabled, we copy it as well, so we can be
1591 * sure that even if the bio pages may still be modified, it
1592 * won't change the data on the wire, thus if the digest checks
1593 * out ok after sending on this side, but does not fit on the
1594 * receiving side, we sure have detected corruption elsewhere.
1595 */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001596 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 ok = _drbd_send_bio(mdev, req->master_bio);
1598 else
1599 ok = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01001600
1601 /* double check digest, sometimes buffers have been modified in flight. */
1602 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02001603 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01001604 * currently supported in kernel crypto. */
1605 unsigned char digest[64];
Philipp Reisnera0638452011-01-19 14:31:32 +01001606 drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest);
1607 if (memcmp(mdev->tconn->int_dig_out, digest, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001608 dev_warn(DEV,
1609 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001610 (unsigned long long)req->i.sector, req->i.size);
Lars Ellenberg470be442010-11-10 10:36:52 +01001611 }
1612 } /* else if (dgs > 64) {
1613 ... Be noisy about digest too large ...
1614 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615 }
1616
Philipp Reisner61120872011-02-08 09:50:54 +01001617 drbd_put_data_sock(mdev->tconn);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001618
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619 return ok;
1620}
1621
1622/* answer packet, used to send data back for read requests:
1623 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
1624 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
1625 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001626int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001627 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628{
1629 int ok;
1630 struct p_data p;
1631 void *dgb;
1632 int dgs;
1633
Philipp Reisnera0638452011-01-19 14:31:32 +01001634 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
1635 crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001637 prepare_header(mdev, &p.head, cmd, sizeof(p) -
1638 sizeof(struct p_header80) +
1639 dgs + peer_req->i.size);
1640 p.sector = cpu_to_be64(peer_req->i.sector);
1641 p.block_id = peer_req->block_id;
Andreas Gruenbachercc378272011-01-26 18:01:50 +01001642 p.seq_num = 0; /* unused */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001643
1644 /* Only called by our kernel thread.
1645 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
1646 * in response to admin command or module unload.
1647 */
Andreas Gruenbacher11b0be22011-03-15 16:15:10 +01001648 if (drbd_get_data_sock(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649 return 0;
1650
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001651 ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652 if (ok && dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001653 dgb = mdev->tconn->int_dig_out;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001654 drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb);
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001655 ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656 }
1657 if (ok)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001658 ok = _drbd_send_zc_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659
Philipp Reisner61120872011-02-08 09:50:54 +01001660 drbd_put_data_sock(mdev->tconn);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001661
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 return ok;
1663}
1664
Philipp Reisner73a01a12010-10-27 14:33:00 +02001665int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
1666{
1667 struct p_block_desc p;
1668
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001669 p.sector = cpu_to_be64(req->i.sector);
1670 p.blksize = cpu_to_be32(req->i.size);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001671
1672 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
1673}
1674
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675/*
1676 drbd_send distinguishes two cases:
1677
1678 Packets sent via the data socket "sock"
1679 and packets sent via the meta data socket "msock"
1680
1681 sock msock
1682 -----------------+-------------------------+------------------------------
1683 timeout conf.timeout / 2 conf.timeout / 2
1684 timeout action send a ping via msock Abort communication
1685 and close all sockets
1686*/
1687
1688/*
1689 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
1690 */
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001691int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692 void *buf, size_t size, unsigned msg_flags)
1693{
1694 struct kvec iov;
1695 struct msghdr msg;
1696 int rv, sent = 0;
1697
1698 if (!sock)
Andreas Gruenbacherc0d42c82010-12-09 23:52:22 +01001699 return -EBADR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700
1701 /* THINK if (signal_pending) return ... ? */
1702
1703 iov.iov_base = buf;
1704 iov.iov_len = size;
1705
1706 msg.msg_name = NULL;
1707 msg.msg_namelen = 0;
1708 msg.msg_control = NULL;
1709 msg.msg_controllen = 0;
1710 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
1711
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001712 if (sock == tconn->data.socket) {
1713 tconn->ko_count = tconn->net_conf->ko_count;
1714 drbd_update_congested(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715 }
1716 do {
1717 /* STRANGE
1718 * tcp_sendmsg does _not_ use its size parameter at all ?
1719 *
1720 * -EAGAIN on timeout, -EINTR on signal.
1721 */
1722/* THINK
1723 * do we need to block DRBD_SIG if sock == &meta.socket ??
1724 * otherwise wake_asender() might interrupt some send_*Ack !
1725 */
1726 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
1727 if (rv == -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001728 if (we_should_drop_the_connection(tconn, sock))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729 break;
1730 else
1731 continue;
1732 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733 if (rv == -EINTR) {
1734 flush_signals(current);
1735 rv = 0;
1736 }
1737 if (rv < 0)
1738 break;
1739 sent += rv;
1740 iov.iov_base += rv;
1741 iov.iov_len -= rv;
1742 } while (sent < size);
1743
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001744 if (sock == tconn->data.socket)
1745 clear_bit(NET_CONGESTED, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746
1747 if (rv <= 0) {
1748 if (rv != -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001749 conn_err(tconn, "%s_sendmsg returned %d\n",
1750 sock == tconn->meta.socket ? "msock" : "sock",
1751 rv);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001752 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 } else
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001754 conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001755 }
1756
1757 return sent;
1758}
1759
Andreas Gruenbacherfb708e42010-12-15 17:04:36 +01001760/**
1761 * drbd_send_all - Send an entire buffer
1762 *
1763 * Returns 0 upon success and a negative error value otherwise.
1764 */
1765int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
1766 size_t size, unsigned msg_flags)
1767{
1768 int err;
1769
1770 err = drbd_send(tconn, sock, buffer, size, msg_flags);
1771 if (err < 0)
1772 return err;
1773 if (err != size)
1774 return -EIO;
1775 return 0;
1776}
1777
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778static int drbd_open(struct block_device *bdev, fmode_t mode)
1779{
1780 struct drbd_conf *mdev = bdev->bd_disk->private_data;
1781 unsigned long flags;
1782 int rv = 0;
1783
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001784 mutex_lock(&drbd_main_mutex);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001785 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 /* to have a stable mdev->state.role
1787 * and no race with updating open_cnt */
1788
1789 if (mdev->state.role != R_PRIMARY) {
1790 if (mode & FMODE_WRITE)
1791 rv = -EROFS;
1792 else if (!allow_oos)
1793 rv = -EMEDIUMTYPE;
1794 }
1795
1796 if (!rv)
1797 mdev->open_cnt++;
Philipp Reisner87eeee42011-01-19 14:16:30 +01001798 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001799 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800
1801 return rv;
1802}
1803
1804static int drbd_release(struct gendisk *gd, fmode_t mode)
1805{
1806 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001807 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001809 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810 return 0;
1811}
1812
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813static void drbd_set_defaults(struct drbd_conf *mdev)
1814{
Lars Ellenbergf3990022011-03-23 14:31:09 +01001815 /* Beware! The actual layout differs
1816 * between big endian and little endian */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001817 mdev->state = (union drbd_state) {
1818 { .role = R_SECONDARY,
1819 .peer = R_UNKNOWN,
1820 .conn = C_STANDALONE,
1821 .disk = D_DISKLESS,
1822 .pdsk = D_UNKNOWN,
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001823 .susp = 0,
1824 .susp_nod = 0,
1825 .susp_fen = 0
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826 } };
1827}
1828
1829void drbd_init_set_defaults(struct drbd_conf *mdev)
1830{
1831 /* the memset(,0,) did most of this.
1832 * note: only assignments, no allocation in here */
1833
1834 drbd_set_defaults(mdev);
1835
Philipp Reisnerb411b362009-09-25 16:07:19 -07001836 atomic_set(&mdev->ap_bio_cnt, 0);
1837 atomic_set(&mdev->ap_pending_cnt, 0);
1838 atomic_set(&mdev->rs_pending_cnt, 0);
1839 atomic_set(&mdev->unacked_cnt, 0);
1840 atomic_set(&mdev->local_cnt, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001841 atomic_set(&mdev->pp_in_use, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001842 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02001843 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001844 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02001845 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846
1847 mutex_init(&mdev->md_io_mutex);
Philipp Reisner8410da8f02011-02-11 20:11:10 +01001848 mutex_init(&mdev->own_state_mutex);
1849 mdev->state_mutex = &mdev->own_state_mutex;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 spin_lock_init(&mdev->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852 spin_lock_init(&mdev->peer_seq_lock);
1853 spin_lock_init(&mdev->epoch_lock);
1854
1855 INIT_LIST_HEAD(&mdev->active_ee);
1856 INIT_LIST_HEAD(&mdev->sync_ee);
1857 INIT_LIST_HEAD(&mdev->done_ee);
1858 INIT_LIST_HEAD(&mdev->read_ee);
1859 INIT_LIST_HEAD(&mdev->net_ee);
1860 INIT_LIST_HEAD(&mdev->resync_reads);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861 INIT_LIST_HEAD(&mdev->resync_work.list);
1862 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001863 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001864 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02001865 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02001867
Philipp Reisner794abb72010-12-27 11:51:23 +01001868 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001869 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001870 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871 mdev->md_sync_work.cb = w_md_sync;
1872 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01001873 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnera21e9292011-02-08 15:08:49 +01001874
1875 mdev->resync_work.mdev = mdev;
1876 mdev->unplug_work.mdev = mdev;
1877 mdev->go_diskless.mdev = mdev;
1878 mdev->md_sync_work.mdev = mdev;
1879 mdev->bm_io_work.w.mdev = mdev;
1880 mdev->start_resync_work.mdev = mdev;
1881
Philipp Reisnerb411b362009-09-25 16:07:19 -07001882 init_timer(&mdev->resync_timer);
1883 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01001884 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001885 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001886 mdev->resync_timer.function = resync_timer_fn;
1887 mdev->resync_timer.data = (unsigned long) mdev;
1888 mdev->md_sync_timer.function = md_sync_timer_fn;
1889 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01001890 mdev->start_resync_timer.function = start_resync_timer_fn;
1891 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001892 mdev->request_timer.function = request_timer_fn;
1893 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001894
1895 init_waitqueue_head(&mdev->misc_wait);
1896 init_waitqueue_head(&mdev->state_wait);
1897 init_waitqueue_head(&mdev->ee_wait);
1898 init_waitqueue_head(&mdev->al_wait);
1899 init_waitqueue_head(&mdev->seq_wait);
1900
Philipp Reisnerfd340c12011-01-19 16:57:39 +01001901 /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */
Philipp Reisner2451fc32010-08-24 13:43:11 +02001902 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001903 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02001904 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
1905 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906}
1907
1908void drbd_mdev_cleanup(struct drbd_conf *mdev)
1909{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001910 int i;
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01001911 if (mdev->tconn->receiver.t_state != NONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001912 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01001913 mdev->tconn->receiver.t_state);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914
1915 /* no need to lock it, I'm the only thread alive */
1916 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
1917 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
1918 mdev->al_writ_cnt =
1919 mdev->bm_writ_cnt =
1920 mdev->read_cnt =
1921 mdev->recv_cnt =
1922 mdev->send_cnt =
1923 mdev->writ_cnt =
1924 mdev->p_size =
1925 mdev->rs_start =
1926 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001927 mdev->rs_failed = 0;
1928 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001929 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001930 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1931 mdev->rs_mark_left[i] = 0;
1932 mdev->rs_mark_time[i] = 0;
1933 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01001934 D_ASSERT(mdev->tconn->net_conf == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001935
1936 drbd_set_my_capacity(mdev, 0);
1937 if (mdev->bitmap) {
1938 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01001939 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001940 drbd_bm_cleanup(mdev);
1941 }
1942
1943 drbd_free_resources(mdev);
Philipp Reisner07782862010-08-31 12:00:50 +02001944 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945
1946 /*
1947 * currently we drbd_init_ee only on module load, so
1948 * we may do drbd_release_ee only on module unload!
1949 */
1950 D_ASSERT(list_empty(&mdev->active_ee));
1951 D_ASSERT(list_empty(&mdev->sync_ee));
1952 D_ASSERT(list_empty(&mdev->done_ee));
1953 D_ASSERT(list_empty(&mdev->read_ee));
1954 D_ASSERT(list_empty(&mdev->net_ee));
1955 D_ASSERT(list_empty(&mdev->resync_reads));
Philipp Reisnere42325a2011-01-19 13:55:45 +01001956 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
1957 D_ASSERT(list_empty(&mdev->tconn->meta.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958 D_ASSERT(list_empty(&mdev->resync_work.list));
1959 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001960 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01001961
1962 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963}
1964
1965
1966static void drbd_destroy_mempools(void)
1967{
1968 struct page *page;
1969
1970 while (drbd_pp_pool) {
1971 page = drbd_pp_pool;
1972 drbd_pp_pool = (struct page *)page_private(page);
1973 __free_page(page);
1974 drbd_pp_vacant--;
1975 }
1976
1977 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
1978
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001979 if (drbd_md_io_bio_set)
1980 bioset_free(drbd_md_io_bio_set);
Lars Ellenberg35abf592011-02-23 12:39:46 +01001981 if (drbd_md_io_page_pool)
1982 mempool_destroy(drbd_md_io_page_pool);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001983 if (drbd_ee_mempool)
1984 mempool_destroy(drbd_ee_mempool);
1985 if (drbd_request_mempool)
1986 mempool_destroy(drbd_request_mempool);
1987 if (drbd_ee_cache)
1988 kmem_cache_destroy(drbd_ee_cache);
1989 if (drbd_request_cache)
1990 kmem_cache_destroy(drbd_request_cache);
1991 if (drbd_bm_ext_cache)
1992 kmem_cache_destroy(drbd_bm_ext_cache);
1993 if (drbd_al_ext_cache)
1994 kmem_cache_destroy(drbd_al_ext_cache);
1995
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001996 drbd_md_io_bio_set = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01001997 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 drbd_ee_mempool = NULL;
1999 drbd_request_mempool = NULL;
2000 drbd_ee_cache = NULL;
2001 drbd_request_cache = NULL;
2002 drbd_bm_ext_cache = NULL;
2003 drbd_al_ext_cache = NULL;
2004
2005 return;
2006}
2007
2008static int drbd_create_mempools(void)
2009{
2010 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002011 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 int i;
2013
2014 /* prepare our caches and mempools */
2015 drbd_request_mempool = NULL;
2016 drbd_ee_cache = NULL;
2017 drbd_request_cache = NULL;
2018 drbd_bm_ext_cache = NULL;
2019 drbd_al_ext_cache = NULL;
2020 drbd_pp_pool = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01002021 drbd_md_io_page_pool = NULL;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002022 drbd_md_io_bio_set = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023
2024 /* caches */
2025 drbd_request_cache = kmem_cache_create(
2026 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
2027 if (drbd_request_cache == NULL)
2028 goto Enomem;
2029
2030 drbd_ee_cache = kmem_cache_create(
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01002031 "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002032 if (drbd_ee_cache == NULL)
2033 goto Enomem;
2034
2035 drbd_bm_ext_cache = kmem_cache_create(
2036 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
2037 if (drbd_bm_ext_cache == NULL)
2038 goto Enomem;
2039
2040 drbd_al_ext_cache = kmem_cache_create(
2041 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
2042 if (drbd_al_ext_cache == NULL)
2043 goto Enomem;
2044
2045 /* mempools */
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002046 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
2047 if (drbd_md_io_bio_set == NULL)
2048 goto Enomem;
2049
Lars Ellenberg35abf592011-02-23 12:39:46 +01002050 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
2051 if (drbd_md_io_page_pool == NULL)
2052 goto Enomem;
2053
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054 drbd_request_mempool = mempool_create(number,
2055 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
2056 if (drbd_request_mempool == NULL)
2057 goto Enomem;
2058
2059 drbd_ee_mempool = mempool_create(number,
2060 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06002061 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002062 goto Enomem;
2063
2064 /* drbd's page pool */
2065 spin_lock_init(&drbd_pp_lock);
2066
2067 for (i = 0; i < number; i++) {
2068 page = alloc_page(GFP_HIGHUSER);
2069 if (!page)
2070 goto Enomem;
2071 set_page_private(page, (unsigned long)drbd_pp_pool);
2072 drbd_pp_pool = page;
2073 }
2074 drbd_pp_vacant = number;
2075
2076 return 0;
2077
2078Enomem:
2079 drbd_destroy_mempools(); /* in case we allocated some */
2080 return -ENOMEM;
2081}
2082
2083static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
2084 void *unused)
2085{
2086 /* just so we have it. you never know what interesting things we
2087 * might want to do here some day...
2088 */
2089
2090 return NOTIFY_DONE;
2091}
2092
2093static struct notifier_block drbd_notifier = {
2094 .notifier_call = drbd_notify_sys,
2095};
2096
2097static void drbd_release_ee_lists(struct drbd_conf *mdev)
2098{
2099 int rr;
2100
2101 rr = drbd_release_ee(mdev, &mdev->active_ee);
2102 if (rr)
2103 dev_err(DEV, "%d EEs in active list found!\n", rr);
2104
2105 rr = drbd_release_ee(mdev, &mdev->sync_ee);
2106 if (rr)
2107 dev_err(DEV, "%d EEs in sync list found!\n", rr);
2108
2109 rr = drbd_release_ee(mdev, &mdev->read_ee);
2110 if (rr)
2111 dev_err(DEV, "%d EEs in read list found!\n", rr);
2112
2113 rr = drbd_release_ee(mdev, &mdev->done_ee);
2114 if (rr)
2115 dev_err(DEV, "%d EEs in done list found!\n", rr);
2116
2117 rr = drbd_release_ee(mdev, &mdev->net_ee);
2118 if (rr)
2119 dev_err(DEV, "%d EEs in net list found!\n", rr);
2120}
2121
Philipp Reisner774b3052011-02-22 02:07:03 -05002122/* caution. no locking. */
2123void drbd_delete_device(unsigned int minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002124{
2125 struct drbd_conf *mdev = minor_to_mdev(minor);
2126
2127 if (!mdev)
2128 return;
2129
Lars Ellenberg569083c2011-03-07 09:49:02 +01002130 idr_remove(&mdev->tconn->volumes, mdev->vnr);
2131 idr_remove(&minors, minor);
2132 synchronize_rcu();
Philipp Reisner774b3052011-02-22 02:07:03 -05002133
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134 /* paranoia asserts */
Andreas Gruenbacher70dc65e2010-12-21 14:46:57 +01002135 D_ASSERT(mdev->open_cnt == 0);
Philipp Reisnere42325a2011-01-19 13:55:45 +01002136 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002137 /* end paranoia asserts */
2138
2139 del_gendisk(mdev->vdisk);
2140
2141 /* cleanup stuff that may have been allocated during
2142 * device (re-)configuration or state changes */
2143
2144 if (mdev->this_bdev)
2145 bdput(mdev->this_bdev);
2146
2147 drbd_free_resources(mdev);
2148
2149 drbd_release_ee_lists(mdev);
2150
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 lc_destroy(mdev->act_log);
2152 lc_destroy(mdev->resync);
2153
2154 kfree(mdev->p_uuid);
2155 /* mdev->p_uuid = NULL; */
2156
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157 /* cleanup the rest that has been
2158 * allocated from drbd_new_device
2159 * and actually free the mdev itself */
2160 drbd_free_mdev(mdev);
2161}
2162
2163static void drbd_cleanup(void)
2164{
2165 unsigned int i;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002166 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002167
2168 unregister_reboot_notifier(&drbd_notifier);
2169
Lars Ellenberg17a93f32010-11-24 10:37:35 +01002170 /* first remove proc,
2171 * drbdsetup uses it's presence to detect
2172 * whether DRBD is loaded.
2173 * If we would get stuck in proc removal,
2174 * but have netlink already deregistered,
2175 * some drbdsetup commands may wait forever
2176 * for an answer.
2177 */
2178 if (drbd_proc)
2179 remove_proc_entry("drbd", NULL);
2180
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002181 drbd_genl_unregister();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182
Philipp Reisner81a5d602011-02-22 19:53:16 -05002183 idr_for_each_entry(&minors, mdev, i)
2184 drbd_delete_device(i);
2185 drbd_destroy_mempools();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186 unregister_blkdev(DRBD_MAJOR, "drbd");
2187
Philipp Reisner81a5d602011-02-22 19:53:16 -05002188 idr_destroy(&minors);
2189
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190 printk(KERN_INFO "drbd: module cleanup done.\n");
2191}
2192
2193/**
2194 * drbd_congested() - Callback for pdflush
2195 * @congested_data: User data
2196 * @bdi_bits: Bits pdflush is currently interested in
2197 *
2198 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
2199 */
2200static int drbd_congested(void *congested_data, int bdi_bits)
2201{
2202 struct drbd_conf *mdev = congested_data;
2203 struct request_queue *q;
2204 char reason = '-';
2205 int r = 0;
2206
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01002207 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 /* DRBD has frozen IO */
2209 r = bdi_bits;
2210 reason = 'd';
2211 goto out;
2212 }
2213
2214 if (get_ldev(mdev)) {
2215 q = bdev_get_queue(mdev->ldev->backing_bdev);
2216 r = bdi_congested(&q->backing_dev_info, bdi_bits);
2217 put_ldev(mdev);
2218 if (r)
2219 reason = 'b';
2220 }
2221
Philipp Reisner01a311a2011-02-07 14:30:33 +01002222 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223 r |= (1 << BDI_async_congested);
2224 reason = reason == 'b' ? 'a' : 'n';
2225 }
2226
2227out:
2228 mdev->congestion_reason = reason;
2229 return r;
2230}
2231
Philipp Reisner6699b652011-02-09 11:10:24 +01002232static void drbd_init_workqueue(struct drbd_work_queue* wq)
2233{
2234 sema_init(&wq->s, 0);
2235 spin_lock_init(&wq->q_lock);
2236 INIT_LIST_HEAD(&wq->q);
2237}
2238
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002239struct drbd_tconn *conn_by_name(const char *name)
2240{
2241 struct drbd_tconn *tconn;
2242
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002243 if (!name || !name[0])
2244 return NULL;
2245
Lars Ellenberg543cc102011-03-10 22:18:18 +01002246 mutex_lock(&drbd_cfg_mutex);
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002247 list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
2248 if (!strcmp(tconn->name, name))
2249 goto found;
2250 }
2251 tconn = NULL;
2252found:
Lars Ellenberg543cc102011-03-10 22:18:18 +01002253 mutex_unlock(&drbd_cfg_mutex);
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002254 return tconn;
2255}
2256
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002257struct drbd_tconn *drbd_new_tconn(const char *name)
Philipp Reisner21114382011-01-19 12:26:59 +01002258{
2259 struct drbd_tconn *tconn;
2260
2261 tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
2262 if (!tconn)
2263 return NULL;
2264
2265 tconn->name = kstrdup(name, GFP_KERNEL);
2266 if (!tconn->name)
2267 goto fail;
2268
Philipp Reisner774b3052011-02-22 02:07:03 -05002269 if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
2270 goto fail;
2271
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002272 if (!tl_init(tconn))
2273 goto fail;
2274
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01002275 tconn->cstate = C_STANDALONE;
Philipp Reisner8410da8f02011-02-11 20:11:10 +01002276 mutex_init(&tconn->cstate_mutex);
Philipp Reisner6699b652011-02-09 11:10:24 +01002277 spin_lock_init(&tconn->req_lock);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01002278 atomic_set(&tconn->net_cnt, 0);
2279 init_waitqueue_head(&tconn->net_cnt_wait);
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01002280 init_waitqueue_head(&tconn->ping_wait);
Philipp Reisner062e8792011-02-08 11:09:18 +01002281 idr_init(&tconn->volumes);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01002282
Philipp Reisner6699b652011-02-09 11:10:24 +01002283 drbd_init_workqueue(&tconn->data.work);
2284 mutex_init(&tconn->data.mutex);
2285
2286 drbd_init_workqueue(&tconn->meta.work);
2287 mutex_init(&tconn->meta.mutex);
2288
Philipp Reisner392c8802011-02-09 10:33:31 +01002289 drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
2290 drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
2291 drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
2292
Lars Ellenbergf3990022011-03-23 14:31:09 +01002293 tconn->res_opts = (struct res_opts) {
2294 {}, 0, /* cpu_mask */
2295 DRBD_ON_NO_DATA_DEF, /* on_no_data */
2296 };
2297
Lars Ellenberg543cc102011-03-10 22:18:18 +01002298 mutex_lock(&drbd_cfg_mutex);
2299 list_add_tail(&tconn->all_tconn, &drbd_tconns);
2300 mutex_unlock(&drbd_cfg_mutex);
Philipp Reisner21114382011-01-19 12:26:59 +01002301
2302 return tconn;
2303
2304fail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002305 tl_cleanup(tconn);
Philipp Reisner774b3052011-02-22 02:07:03 -05002306 free_cpumask_var(tconn->cpu_mask);
Philipp Reisner21114382011-01-19 12:26:59 +01002307 kfree(tconn->name);
2308 kfree(tconn);
2309
2310 return NULL;
2311}
2312
2313void drbd_free_tconn(struct drbd_tconn *tconn)
2314{
Philipp Reisner21114382011-01-19 12:26:59 +01002315 list_del(&tconn->all_tconn);
Philipp Reisner062e8792011-02-08 11:09:18 +01002316 idr_destroy(&tconn->volumes);
Philipp Reisner21114382011-01-19 12:26:59 +01002317
Philipp Reisner774b3052011-02-22 02:07:03 -05002318 free_cpumask_var(tconn->cpu_mask);
Philipp Reisner21114382011-01-19 12:26:59 +01002319 kfree(tconn->name);
Philipp Reisnerb42a70a2011-01-27 10:55:20 +01002320 kfree(tconn->int_dig_out);
2321 kfree(tconn->int_dig_in);
2322 kfree(tconn->int_dig_vv);
Philipp Reisner21114382011-01-19 12:26:59 +01002323 kfree(tconn);
2324}
2325
Philipp Reisner774b3052011-02-22 02:07:03 -05002326enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002327{
2328 struct drbd_conf *mdev;
2329 struct gendisk *disk;
2330 struct request_queue *q;
Philipp Reisner774b3052011-02-22 02:07:03 -05002331 int vnr_got = vnr;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002332 int minor_got = minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002333 enum drbd_ret_code err = ERR_NOMEM;
Philipp Reisner774b3052011-02-22 02:07:03 -05002334
2335 mdev = minor_to_mdev(minor);
2336 if (mdev)
2337 return ERR_MINOR_EXISTS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002338
2339 /* GFP_KERNEL, we are outside of all write-out paths */
2340 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
2341 if (!mdev)
Philipp Reisner774b3052011-02-22 02:07:03 -05002342 return ERR_NOMEM;
2343
2344 mdev->tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002345 mdev->minor = minor;
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002346 mdev->vnr = vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002347
2348 drbd_init_set_defaults(mdev);
2349
2350 q = blk_alloc_queue(GFP_KERNEL);
2351 if (!q)
2352 goto out_no_q;
2353 mdev->rq_queue = q;
2354 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002355
2356 disk = alloc_disk(1);
2357 if (!disk)
2358 goto out_no_disk;
2359 mdev->vdisk = disk;
2360
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002361 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002362
2363 disk->queue = q;
2364 disk->major = DRBD_MAJOR;
2365 disk->first_minor = minor;
2366 disk->fops = &drbd_ops;
2367 sprintf(disk->disk_name, "drbd%d", minor);
2368 disk->private_data = mdev;
2369
2370 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
2371 /* we have no partitions. we contain only ourselves. */
2372 mdev->this_bdev->bd_contains = mdev->this_bdev;
2373
2374 q->backing_dev_info.congested_fn = drbd_congested;
2375 q->backing_dev_info.congested_data = mdev;
2376
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01002377 blk_queue_make_request(q, drbd_make_request);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002378 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2379 This triggers a max_bio_size message upon first attach or connect */
2380 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002381 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2382 blk_queue_merge_bvec(q, drbd_merge_bvec);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002383 q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002384
2385 mdev->md_io_page = alloc_page(GFP_KERNEL);
2386 if (!mdev->md_io_page)
2387 goto out_no_io_page;
2388
2389 if (drbd_bm_init(mdev))
2390 goto out_no_bitmap;
Andreas Gruenbacherdac13892011-01-21 17:18:39 +01002391 mdev->read_requests = RB_ROOT;
Andreas Gruenbacherde696712011-01-20 15:00:24 +01002392 mdev->write_requests = RB_ROOT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002393
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
2395 if (!mdev->current_epoch)
2396 goto out_no_epoch;
2397
2398 INIT_LIST_HEAD(&mdev->current_epoch->list);
2399 mdev->epochs = 1;
2400
Lars Ellenberg8432b312011-03-08 16:11:16 +01002401 if (!idr_pre_get(&minors, GFP_KERNEL))
2402 goto out_no_minor_idr;
2403 if (idr_get_new_above(&minors, mdev, minor, &minor_got))
2404 goto out_no_minor_idr;
2405 if (minor_got != minor) {
2406 err = ERR_MINOR_EXISTS;
2407 drbd_msg_put_info("requested minor exists already");
2408 goto out_idr_remove_minor;
Lars Ellenberg569083c2011-03-07 09:49:02 +01002409 }
2410
Lars Ellenberg8432b312011-03-08 16:11:16 +01002411 if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
Lars Ellenberg569083c2011-03-07 09:49:02 +01002412 goto out_idr_remove_minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002413 if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
2414 goto out_idr_remove_minor;
2415 if (vnr_got != vnr) {
2416 err = ERR_INVALID_REQUEST;
2417 drbd_msg_put_info("requested volume exists already");
2418 goto out_idr_remove_vol;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002419 }
Philipp Reisner774b3052011-02-22 02:07:03 -05002420 add_disk(disk);
2421
Philipp Reisner2325eb62011-03-15 16:56:18 +01002422 /* inherit the connection state */
2423 mdev->state.conn = tconn->cstate;
2424 if (mdev->state.conn == C_WF_REPORT_PARAMS)
2425 drbd_connected(vnr, mdev, tconn);
2426
Philipp Reisner774b3052011-02-22 02:07:03 -05002427 return NO_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002428
Lars Ellenberg569083c2011-03-07 09:49:02 +01002429out_idr_remove_vol:
2430 idr_remove(&tconn->volumes, vnr_got);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002431out_idr_remove_minor:
2432 idr_remove(&minors, minor_got);
Lars Ellenberg569083c2011-03-07 09:49:02 +01002433 synchronize_rcu();
Lars Ellenberg8432b312011-03-08 16:11:16 +01002434out_no_minor_idr:
Philipp Reisner81a5d602011-02-22 19:53:16 -05002435 kfree(mdev->current_epoch);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436out_no_epoch:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 drbd_bm_cleanup(mdev);
2438out_no_bitmap:
2439 __free_page(mdev->md_io_page);
2440out_no_io_page:
2441 put_disk(disk);
2442out_no_disk:
2443 blk_cleanup_queue(q);
2444out_no_q:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445 kfree(mdev);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002446 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002447}
2448
2449/* counterpart of drbd_new_device.
2450 * last part of drbd_delete_device. */
2451void drbd_free_mdev(struct drbd_conf *mdev)
2452{
2453 kfree(mdev->current_epoch);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454 if (mdev->bitmap) /* should no longer be there. */
2455 drbd_bm_cleanup(mdev);
2456 __free_page(mdev->md_io_page);
2457 put_disk(mdev->vdisk);
2458 blk_cleanup_queue(mdev->rq_queue);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002459 kfree(mdev);
2460}
2461
2462
2463int __init drbd_init(void)
2464{
2465 int err;
2466
Philipp Reisnerfd340c12011-01-19 16:57:39 +01002467 BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
2468 BUILD_BUG_ON(sizeof(struct p_handshake) != 80);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002469
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01002470 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002471 printk(KERN_ERR
Philipp Reisner81a5d602011-02-22 19:53:16 -05002472 "drbd: invalid minor_count (%d)\n", minor_count);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473#ifdef MODULE
2474 return -EINVAL;
2475#else
2476 minor_count = 8;
2477#endif
2478 }
2479
Philipp Reisnerb411b362009-09-25 16:07:19 -07002480 err = register_blkdev(DRBD_MAJOR, "drbd");
2481 if (err) {
2482 printk(KERN_ERR
2483 "drbd: unable to register block device major %d\n",
2484 DRBD_MAJOR);
2485 return err;
2486 }
2487
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002488 err = drbd_genl_register();
2489 if (err) {
2490 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2491 goto fail;
2492 }
2493
2494
Philipp Reisnerb411b362009-09-25 16:07:19 -07002495 register_reboot_notifier(&drbd_notifier);
2496
2497 /*
2498 * allocate all necessary structs
2499 */
2500 err = -ENOMEM;
2501
2502 init_waitqueue_head(&drbd_pp_wait);
2503
2504 drbd_proc = NULL; /* play safe for drbd_cleanup */
Philipp Reisner81a5d602011-02-22 19:53:16 -05002505 idr_init(&minors);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506
2507 err = drbd_create_mempools();
2508 if (err)
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002509 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002510
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01002511 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002512 if (!drbd_proc) {
2513 printk(KERN_ERR "drbd: unable to register proc file\n");
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002514 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002515 }
2516
2517 rwlock_init(&global_state_lock);
Philipp Reisner21114382011-01-19 12:26:59 +01002518 INIT_LIST_HEAD(&drbd_tconns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002519
2520 printk(KERN_INFO "drbd: initialized. "
2521 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2522 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2523 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
2524 printk(KERN_INFO "drbd: registered as block device major %d\n",
2525 DRBD_MAJOR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526
2527 return 0; /* Success! */
2528
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002529fail:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530 drbd_cleanup();
2531 if (err == -ENOMEM)
2532 /* currently always the case */
2533 printk(KERN_ERR "drbd: ran out of memory\n");
2534 else
2535 printk(KERN_ERR "drbd: initialization failure\n");
2536 return err;
2537}
2538
2539void drbd_free_bc(struct drbd_backing_dev *ldev)
2540{
2541 if (ldev == NULL)
2542 return;
2543
Tejun Heoe525fd82010-11-13 11:55:17 +01002544 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2545 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002546
2547 kfree(ldev);
2548}
2549
Philipp Reisner360cc742011-02-08 14:29:53 +01002550void drbd_free_sock(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551{
Philipp Reisner360cc742011-02-08 14:29:53 +01002552 if (tconn->data.socket) {
2553 mutex_lock(&tconn->data.mutex);
2554 kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
2555 sock_release(tconn->data.socket);
2556 tconn->data.socket = NULL;
2557 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002558 }
Philipp Reisner360cc742011-02-08 14:29:53 +01002559 if (tconn->meta.socket) {
2560 mutex_lock(&tconn->meta.mutex);
2561 kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
2562 sock_release(tconn->meta.socket);
2563 tconn->meta.socket = NULL;
2564 mutex_unlock(&tconn->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565 }
2566}
2567
2568
2569void drbd_free_resources(struct drbd_conf *mdev)
2570{
Lars Ellenbergf3990022011-03-23 14:31:09 +01002571 crypto_free_hash(mdev->tconn->csums_tfm);
2572 mdev->tconn->csums_tfm = NULL;
2573 crypto_free_hash(mdev->tconn->verify_tfm);
2574 mdev->tconn->verify_tfm = NULL;
Philipp Reisnera0638452011-01-19 14:31:32 +01002575 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
2576 mdev->tconn->cram_hmac_tfm = NULL;
2577 crypto_free_hash(mdev->tconn->integrity_w_tfm);
2578 mdev->tconn->integrity_w_tfm = NULL;
2579 crypto_free_hash(mdev->tconn->integrity_r_tfm);
2580 mdev->tconn->integrity_r_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002581
Philipp Reisner360cc742011-02-08 14:29:53 +01002582 drbd_free_sock(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002583
2584 __no_warn(local,
2585 drbd_free_bc(mdev->ldev);
2586 mdev->ldev = NULL;);
2587}
2588
2589/* meta data management */
2590
2591struct meta_data_on_disk {
2592 u64 la_size; /* last agreed size. */
2593 u64 uuid[UI_SIZE]; /* UUIDs. */
2594 u64 device_uuid;
2595 u64 reserved_u64_1;
2596 u32 flags; /* MDF */
2597 u32 magic;
2598 u32 md_size_sect;
2599 u32 al_offset; /* offset to this block */
2600 u32 al_nr_extents; /* important for restoring the AL */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002601 /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002602 u32 bm_offset; /* offset to the bitmap, from here */
2603 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02002604 u32 la_peer_max_bio_size; /* last peer max_bio_size */
2605 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002606
2607} __packed;
2608
2609/**
2610 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
2611 * @mdev: DRBD device.
2612 */
2613void drbd_md_sync(struct drbd_conf *mdev)
2614{
2615 struct meta_data_on_disk *buffer;
2616 sector_t sector;
2617 int i;
2618
Lars Ellenbergee15b032010-09-03 10:00:09 +02002619 del_timer(&mdev->md_sync_timer);
2620 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
2622 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002623
2624 /* We use here D_FAILED and not D_ATTACHING because we try to write
2625 * metadata even if we detach due to a disk failure! */
2626 if (!get_ldev_if_state(mdev, D_FAILED))
2627 return;
2628
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629 mutex_lock(&mdev->md_io_mutex);
2630 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2631 memset(buffer, 0, 512);
2632
2633 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
2634 for (i = UI_CURRENT; i < UI_SIZE; i++)
2635 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
2636 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
2637 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
2638
2639 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
2640 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
2641 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
2642 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
2643 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
2644
2645 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002646 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647
2648 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
2649 sector = mdev->ldev->md.md_offset;
2650
Lars Ellenberg3f3a9b82010-09-01 15:12:12 +02002651 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 /* this was a try anyways ... */
2653 dev_err(DEV, "meta data update failed!\n");
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002654 drbd_chk_io_error(mdev, 1, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002655 }
2656
2657 /* Update mdev->ldev->md.la_size_sect,
2658 * since we updated it on metadata. */
2659 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
2660
2661 mutex_unlock(&mdev->md_io_mutex);
2662 put_ldev(mdev);
2663}
2664
2665/**
2666 * drbd_md_read() - Reads in the meta data super block
2667 * @mdev: DRBD device.
2668 * @bdev: Device from which the meta data should be read in.
2669 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01002670 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
2672 */
2673int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2674{
2675 struct meta_data_on_disk *buffer;
2676 int i, rv = NO_ERROR;
2677
2678 if (!get_ldev_if_state(mdev, D_ATTACHING))
2679 return ERR_IO_MD_DISK;
2680
Philipp Reisnerb411b362009-09-25 16:07:19 -07002681 mutex_lock(&mdev->md_io_mutex);
2682 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2683
2684 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002685 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07002686 called BEFORE disk is attached */
2687 dev_err(DEV, "Error while reading metadata.\n");
2688 rv = ERR_IO_MD_DISK;
2689 goto err;
2690 }
2691
Andreas Gruenbachere7fad8a2011-01-11 13:54:02 +01002692 if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002693 dev_err(DEV, "Error while reading metadata, magic not found.\n");
2694 rv = ERR_MD_INVALID;
2695 goto err;
2696 }
2697 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
2698 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
2699 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
2700 rv = ERR_MD_INVALID;
2701 goto err;
2702 }
2703 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
2704 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
2705 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
2706 rv = ERR_MD_INVALID;
2707 goto err;
2708 }
2709 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
2710 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
2711 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
2712 rv = ERR_MD_INVALID;
2713 goto err;
2714 }
2715
2716 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
2717 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
2718 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
2719 rv = ERR_MD_INVALID;
2720 goto err;
2721 }
2722
2723 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
2724 for (i = UI_CURRENT; i < UI_SIZE; i++)
2725 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
2726 bdev->md.flags = be32_to_cpu(buffer->flags);
Lars Ellenbergf3990022011-03-23 14:31:09 +01002727 bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
2729
Philipp Reisner87eeee42011-01-19 14:16:30 +01002730 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002731 if (mdev->state.conn < C_CONNECTED) {
2732 int peer;
2733 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
2734 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
2735 mdev->peer_max_bio_size = peer;
2736 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01002737 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002738
Lars Ellenbergf3990022011-03-23 14:31:09 +01002739 if (bdev->dc.al_extents < 7)
2740 bdev->dc.al_extents = 127;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741
2742 err:
2743 mutex_unlock(&mdev->md_io_mutex);
2744 put_ldev(mdev);
2745
2746 return rv;
2747}
2748
2749/**
2750 * drbd_md_mark_dirty() - Mark meta data super block as dirty
2751 * @mdev: DRBD device.
2752 *
2753 * Call this function if you change anything that should be written to
2754 * the meta-data super block. This function sets MD_DIRTY, and starts a
2755 * timer that ensures that within five seconds you have to call drbd_md_sync().
2756 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02002757#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02002758void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
2759{
2760 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
2761 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
2762 mdev->last_md_mark_dirty.line = line;
2763 mdev->last_md_mark_dirty.func = func;
2764 }
2765}
2766#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07002767void drbd_md_mark_dirty(struct drbd_conf *mdev)
2768{
Lars Ellenbergee15b032010-09-03 10:00:09 +02002769 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02002770 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771}
Lars Ellenbergee15b032010-09-03 10:00:09 +02002772#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773
2774static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
2775{
2776 int i;
2777
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002778 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002779 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002780}
2781
2782void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
2783{
2784 if (idx == UI_CURRENT) {
2785 if (mdev->state.role == R_PRIMARY)
2786 val |= 1;
2787 else
2788 val &= ~((u64)1);
2789
2790 drbd_set_ed_uuid(mdev, val);
2791 }
2792
2793 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002794 drbd_md_mark_dirty(mdev);
2795}
2796
2797
2798void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
2799{
2800 if (mdev->ldev->md.uuid[idx]) {
2801 drbd_uuid_move_history(mdev);
2802 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803 }
2804 _drbd_uuid_set(mdev, idx, val);
2805}
2806
2807/**
2808 * drbd_uuid_new_current() - Creates a new current UUID
2809 * @mdev: DRBD device.
2810 *
2811 * Creates a new current UUID, and rotates the old current UUID into
2812 * the bitmap slot. Causes an incremental resync upon next connect.
2813 */
2814void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
2815{
2816 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002817 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002818
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002819 if (bm_uuid)
2820 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
2821
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823
2824 get_random_bytes(&val, sizeof(u64));
2825 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002826 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02002827 /* get it to stable storage _now_ */
2828 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002829}
2830
2831void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
2832{
2833 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
2834 return;
2835
2836 if (val == 0) {
2837 drbd_uuid_move_history(mdev);
2838 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
2839 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002840 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002841 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
2842 if (bm_uuid)
2843 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002845 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846 }
2847 drbd_md_mark_dirty(mdev);
2848}
2849
2850/**
2851 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
2852 * @mdev: DRBD device.
2853 *
2854 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
2855 */
2856int drbd_bmio_set_n_write(struct drbd_conf *mdev)
2857{
2858 int rv = -EIO;
2859
2860 if (get_ldev_if_state(mdev, D_ATTACHING)) {
2861 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
2862 drbd_md_sync(mdev);
2863 drbd_bm_set_all(mdev);
2864
2865 rv = drbd_bm_write(mdev);
2866
2867 if (!rv) {
2868 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2869 drbd_md_sync(mdev);
2870 }
2871
2872 put_ldev(mdev);
2873 }
2874
2875 return rv;
2876}
2877
2878/**
2879 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
2880 * @mdev: DRBD device.
2881 *
2882 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
2883 */
2884int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
2885{
2886 int rv = -EIO;
2887
Philipp Reisner07782862010-08-31 12:00:50 +02002888 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 if (get_ldev_if_state(mdev, D_ATTACHING)) {
2890 drbd_bm_clear_all(mdev);
2891 rv = drbd_bm_write(mdev);
2892 put_ldev(mdev);
2893 }
2894
2895 return rv;
2896}
2897
Philipp Reisner00d56942011-02-09 18:09:48 +01002898static int w_bitmap_io(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899{
2900 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01002901 struct drbd_conf *mdev = w->mdev;
Lars Ellenberg02851e92010-12-16 14:47:39 +01002902 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903
2904 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
2905
Lars Ellenberg02851e92010-12-16 14:47:39 +01002906 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002907 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01002908 rv = work->io_fn(mdev);
2909 drbd_bm_unlock(mdev);
2910 put_ldev(mdev);
2911 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002912
Lars Ellenberg4738fa12011-02-21 13:20:55 +01002913 clear_bit_unlock(BITMAP_IO, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914 wake_up(&mdev->misc_wait);
2915
2916 if (work->done)
2917 work->done(mdev, rv);
2918
2919 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
2920 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002921 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002922
2923 return 1;
2924}
2925
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02002926void drbd_ldev_destroy(struct drbd_conf *mdev)
2927{
2928 lc_destroy(mdev->resync);
2929 mdev->resync = NULL;
2930 lc_destroy(mdev->act_log);
2931 mdev->act_log = NULL;
2932 __no_warn(local,
2933 drbd_free_bc(mdev->ldev);
2934 mdev->ldev = NULL;);
2935
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02002936 clear_bit(GO_DISKLESS, &mdev->flags);
2937}
2938
Philipp Reisner00d56942011-02-09 18:09:48 +01002939static int w_go_diskless(struct drbd_work *w, int unused)
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002940{
Philipp Reisner00d56942011-02-09 18:09:48 +01002941 struct drbd_conf *mdev = w->mdev;
2942
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002943 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02002944 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
2945 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02002946 * the protected members anymore, though, so once put_ldev reaches zero
2947 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002948 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002949 return 1;
2950}
2951
2952void drbd_go_diskless(struct drbd_conf *mdev)
2953{
2954 D_ASSERT(mdev->state.disk == D_FAILED);
2955 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01002956 drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002957}
2958
Philipp Reisnerb411b362009-09-25 16:07:19 -07002959/**
2960 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
2961 * @mdev: DRBD device.
2962 * @io_fn: IO callback to be called when bitmap IO is possible
2963 * @done: callback to be called after the bitmap IO was performed
2964 * @why: Descriptive text of the reason for doing the IO
2965 *
2966 * While IO on the bitmap happens we freeze application IO thus we ensure
2967 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
2968 * called from worker context. It MUST NOT be used while a previous such
2969 * work is still pending!
2970 */
2971void drbd_queue_bitmap_io(struct drbd_conf *mdev,
2972 int (*io_fn)(struct drbd_conf *),
2973 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002974 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002975{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01002976 D_ASSERT(current == mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977
2978 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
2979 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
2980 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
2981 if (mdev->bm_io_work.why)
2982 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
2983 why, mdev->bm_io_work.why);
2984
2985 mdev->bm_io_work.io_fn = io_fn;
2986 mdev->bm_io_work.done = done;
2987 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002988 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989
Philipp Reisner87eeee42011-01-19 14:16:30 +01002990 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991 set_bit(BITMAP_IO, &mdev->flags);
2992 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01002993 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01002994 drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01002996 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002997}
2998
2999/**
3000 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
3001 * @mdev: DRBD device.
3002 * @io_fn: IO callback to be called when bitmap IO is possible
3003 * @why: Descriptive text of the reason for doing the IO
3004 *
3005 * freezes application IO while that the actual IO operations runs. This
3006 * functions MAY NOT be called from worker context.
3007 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003008int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
3009 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010{
3011 int rv;
3012
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003013 D_ASSERT(current != mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003014
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003015 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3016 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003018 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003019 rv = io_fn(mdev);
3020 drbd_bm_unlock(mdev);
3021
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003022 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3023 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024
3025 return rv;
3026}
3027
3028void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3029{
3030 if ((mdev->ldev->md.flags & flag) != flag) {
3031 drbd_md_mark_dirty(mdev);
3032 mdev->ldev->md.flags |= flag;
3033 }
3034}
3035
3036void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3037{
3038 if ((mdev->ldev->md.flags & flag) != 0) {
3039 drbd_md_mark_dirty(mdev);
3040 mdev->ldev->md.flags &= ~flag;
3041 }
3042}
3043int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3044{
3045 return (bdev->md.flags & flag) != 0;
3046}
3047
3048static void md_sync_timer_fn(unsigned long data)
3049{
3050 struct drbd_conf *mdev = (struct drbd_conf *) data;
3051
Philipp Reisnere42325a2011-01-19 13:55:45 +01003052 drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053}
3054
Philipp Reisner00d56942011-02-09 18:09:48 +01003055static int w_md_sync(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056{
Philipp Reisner00d56942011-02-09 18:09:48 +01003057 struct drbd_conf *mdev = w->mdev;
3058
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02003060#ifdef DEBUG
3061 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
3062 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
3063#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065 return 1;
3066}
3067
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003068const char *cmdname(enum drbd_packet cmd)
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003069{
3070 /* THINK may need to become several global tables
3071 * when we want to support more than
3072 * one PRO_VERSION */
3073 static const char *cmdnames[] = {
3074 [P_DATA] = "Data",
3075 [P_DATA_REPLY] = "DataReply",
3076 [P_RS_DATA_REPLY] = "RSDataReply",
3077 [P_BARRIER] = "Barrier",
3078 [P_BITMAP] = "ReportBitMap",
3079 [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
3080 [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
3081 [P_UNPLUG_REMOTE] = "UnplugRemote",
3082 [P_DATA_REQUEST] = "DataRequest",
3083 [P_RS_DATA_REQUEST] = "RSDataRequest",
3084 [P_SYNC_PARAM] = "SyncParam",
3085 [P_SYNC_PARAM89] = "SyncParam89",
3086 [P_PROTOCOL] = "ReportProtocol",
3087 [P_UUIDS] = "ReportUUIDs",
3088 [P_SIZES] = "ReportSizes",
3089 [P_STATE] = "ReportState",
3090 [P_SYNC_UUID] = "ReportSyncUUID",
3091 [P_AUTH_CHALLENGE] = "AuthChallenge",
3092 [P_AUTH_RESPONSE] = "AuthResponse",
3093 [P_PING] = "Ping",
3094 [P_PING_ACK] = "PingAck",
3095 [P_RECV_ACK] = "RecvAck",
3096 [P_WRITE_ACK] = "WriteAck",
3097 [P_RS_WRITE_ACK] = "RSWriteAck",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003098 [P_DISCARD_WRITE] = "DiscardWrite",
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003099 [P_NEG_ACK] = "NegAck",
3100 [P_NEG_DREPLY] = "NegDReply",
3101 [P_NEG_RS_DREPLY] = "NegRSDReply",
3102 [P_BARRIER_ACK] = "BarrierAck",
3103 [P_STATE_CHG_REQ] = "StateChgRequest",
3104 [P_STATE_CHG_REPLY] = "StateChgReply",
3105 [P_OV_REQUEST] = "OVRequest",
3106 [P_OV_REPLY] = "OVReply",
3107 [P_OV_RESULT] = "OVResult",
3108 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
3109 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3110 [P_COMPRESSED_BITMAP] = "CBitmap",
3111 [P_DELAY_PROBE] = "DelayProbe",
3112 [P_OUT_OF_SYNC] = "OutOfSync",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003113 [P_RETRY_WRITE] = "RetryWrite",
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003114 };
3115
3116 if (cmd == P_HAND_SHAKE_M)
3117 return "HandShakeM";
3118 if (cmd == P_HAND_SHAKE_S)
3119 return "HandShakeS";
3120 if (cmd == P_HAND_SHAKE)
3121 return "HandShake";
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003122 if (cmd >= ARRAY_SIZE(cmdnames))
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003123 return "Unknown";
3124 return cmdnames[cmd];
3125}
3126
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003127/**
3128 * drbd_wait_misc - wait for a request to make progress
3129 * @mdev: device associated with the request
3130 * @i: the struct drbd_interval embedded in struct drbd_request or
3131 * struct drbd_peer_request
3132 */
3133int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
3134{
3135 struct net_conf *net_conf = mdev->tconn->net_conf;
3136 DEFINE_WAIT(wait);
3137 long timeout;
3138
3139 if (!net_conf)
3140 return -ETIMEDOUT;
3141 timeout = MAX_SCHEDULE_TIMEOUT;
3142 if (net_conf->ko_count)
3143 timeout = net_conf->timeout * HZ / 10 * net_conf->ko_count;
3144
3145 /* Indicate to wake up mdev->misc_wait on progress. */
3146 i->waiting = true;
3147 prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
3148 spin_unlock_irq(&mdev->tconn->req_lock);
3149 timeout = schedule_timeout(timeout);
3150 finish_wait(&mdev->misc_wait, &wait);
3151 spin_lock_irq(&mdev->tconn->req_lock);
3152 if (!timeout || mdev->state.conn < C_CONNECTED)
3153 return -ETIMEDOUT;
3154 if (signal_pending(current))
3155 return -ERESTARTSYS;
3156 return 0;
3157}
3158
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159#ifdef CONFIG_DRBD_FAULT_INJECTION
3160/* Fault insertion support including random number generator shamelessly
3161 * stolen from kernel/rcutorture.c */
3162struct fault_random_state {
3163 unsigned long state;
3164 unsigned long count;
3165};
3166
3167#define FAULT_RANDOM_MULT 39916801 /* prime */
3168#define FAULT_RANDOM_ADD 479001701 /* prime */
3169#define FAULT_RANDOM_REFRESH 10000
3170
3171/*
3172 * Crude but fast random-number generator. Uses a linear congruential
3173 * generator, with occasional help from get_random_bytes().
3174 */
3175static unsigned long
3176_drbd_fault_random(struct fault_random_state *rsp)
3177{
3178 long refresh;
3179
Roel Kluin49829ea2009-12-15 22:55:44 +01003180 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003181 get_random_bytes(&refresh, sizeof(refresh));
3182 rsp->state += refresh;
3183 rsp->count = FAULT_RANDOM_REFRESH;
3184 }
3185 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
3186 return swahw32(rsp->state);
3187}
3188
3189static char *
3190_drbd_fault_str(unsigned int type) {
3191 static char *_faults[] = {
3192 [DRBD_FAULT_MD_WR] = "Meta-data write",
3193 [DRBD_FAULT_MD_RD] = "Meta-data read",
3194 [DRBD_FAULT_RS_WR] = "Resync write",
3195 [DRBD_FAULT_RS_RD] = "Resync read",
3196 [DRBD_FAULT_DT_WR] = "Data write",
3197 [DRBD_FAULT_DT_RD] = "Data read",
3198 [DRBD_FAULT_DT_RA] = "Data read ahead",
3199 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02003200 [DRBD_FAULT_AL_EE] = "EE allocation",
3201 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003202 };
3203
3204 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
3205}
3206
3207unsigned int
3208_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3209{
3210 static struct fault_random_state rrs = {0, 0};
3211
3212 unsigned int ret = (
3213 (fault_devs == 0 ||
3214 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
3215 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
3216
3217 if (ret) {
3218 fault_count++;
3219
Lars Ellenberg73835062010-05-27 11:51:56 +02003220 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 dev_warn(DEV, "***Simulating %s failure\n",
3222 _drbd_fault_str(type));
3223 }
3224
3225 return ret;
3226}
3227#endif
3228
3229const char *drbd_buildtag(void)
3230{
3231 /* DRBD built from external sources has here a reference to the
3232 git hash of the source code. */
3233
3234 static char buildtag[38] = "\0uilt-in";
3235
3236 if (buildtag[0] == 0) {
3237#ifdef CONFIG_MODULES
3238 if (THIS_MODULE != NULL)
3239 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
3240 else
3241#endif
3242 buildtag[0] = 'b';
3243 }
3244
3245 return buildtag;
3246}
3247
3248module_init(drbd_init)
3249module_exit(drbd_cleanup)
3250
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251EXPORT_SYMBOL(drbd_conn_str);
3252EXPORT_SYMBOL(drbd_role_str);
3253EXPORT_SYMBOL(drbd_disk_str);
3254EXPORT_SYMBOL(drbd_set_st_err_str);