blob: 1fcbaa178b4bb93f3abc81a0937f5cb932eead42 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020059static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070060int drbdd_init(struct drbd_thread *);
61int drbd_worker(struct drbd_thread *);
62int drbd_asender(struct drbd_thread *);
63
64int drbd_init(void);
65static int drbd_open(struct block_device *bdev, fmode_t mode);
66static int drbd_release(struct gendisk *gd, fmode_t mode);
Philipp Reisner00d56942011-02-09 18:09:48 +010067static int w_md_sync(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068static void md_sync_timer_fn(unsigned long data);
Philipp Reisner00d56942011-02-09 18:09:48 +010069static int w_bitmap_io(struct drbd_work *w, int unused);
70static int w_go_diskless(struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
73 "Lars Ellenberg <lars@linbit.com>");
74MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
75MODULE_VERSION(REL_VERSION);
76MODULE_LICENSE("GPL");
Philipp Reisner81a5d602011-02-22 19:53:16 -050077MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010078 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070079MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
80
81#include <linux/moduleparam.h>
82/* allow_open_on_secondary */
83MODULE_PARM_DESC(allow_oos, "DONT USE!");
84/* thanks to these macros, if compiled into the kernel (not-module),
85 * this becomes the boot parameter drbd.minor_count */
86module_param(minor_count, uint, 0444);
87module_param(disable_sendpage, bool, 0644);
88module_param(allow_oos, bool, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -070089module_param(proc_details, int, 0644);
90
91#ifdef CONFIG_DRBD_FAULT_INJECTION
92int enable_faults;
93int fault_rate;
94static int fault_count;
95int fault_devs;
96/* bitmap of enabled faults */
97module_param(enable_faults, int, 0664);
98/* fault rate % value - applies to all enabled faults */
99module_param(fault_rate, int, 0664);
100/* count of faults inserted */
101module_param(fault_count, int, 0664);
102/* bitmap of devices to insert faults on */
103module_param(fault_devs, int, 0644);
104#endif
105
106/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100107unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108int disable_sendpage;
109int allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700110int proc_details; /* Detail level in proc drbd*/
111
112/* Module parameter for setting the user mode helper program
113 * to run. Default is /sbin/drbdadm */
114char usermode_helper[80] = "/sbin/drbdadm";
115
116module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
117
118/* in 2.6.x, our device mapping and config info contains our virtual gendisks
119 * as member "struct gendisk *vdisk;"
120 */
Philipp Reisner81a5d602011-02-22 19:53:16 -0500121struct idr minors;
Philipp Reisner21114382011-01-19 12:26:59 +0100122struct list_head drbd_tconns; /* list of struct drbd_tconn */
Lars Ellenberg543cc102011-03-10 22:18:18 +0100123DEFINE_MUTEX(drbd_cfg_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700124
125struct kmem_cache *drbd_request_cache;
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +0100126struct kmem_cache *drbd_ee_cache; /* peer requests */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
128struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
129mempool_t *drbd_request_mempool;
130mempool_t *drbd_ee_mempool;
Lars Ellenberg35abf592011-02-23 12:39:46 +0100131mempool_t *drbd_md_io_page_pool;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100132struct bio_set *drbd_md_io_bio_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700133
134/* I do not use a standard mempool, because:
135 1) I want to hand out the pre-allocated objects first.
136 2) I want to be able to interrupt sleeping allocation with a signal.
137 Note: This is a single linked list, the next pointer is the private
138 member of struct page.
139 */
140struct page *drbd_pp_pool;
141spinlock_t drbd_pp_lock;
142int drbd_pp_vacant;
143wait_queue_head_t drbd_pp_wait;
144
145DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
146
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100147static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700148 .owner = THIS_MODULE,
149 .open = drbd_open,
150 .release = drbd_release,
151};
152
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100153static void bio_destructor_drbd(struct bio *bio)
154{
155 bio_free(bio, drbd_md_io_bio_set);
156}
157
158struct bio *bio_alloc_drbd(gfp_t gfp_mask)
159{
160 struct bio *bio;
161
162 if (!drbd_md_io_bio_set)
163 return bio_alloc(gfp_mask, 1);
164
165 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
166 if (!bio)
167 return NULL;
168 bio->bi_destructor = bio_destructor_drbd;
169 return bio;
170}
171
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172#ifdef __CHECKER__
173/* When checking with sparse, and this is an inline function, sparse will
174 give tons of false positives. When this is a real functions sparse works.
175 */
176int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
177{
178 int io_allowed;
179
180 atomic_inc(&mdev->local_cnt);
181 io_allowed = (mdev->state.disk >= mins);
182 if (!io_allowed) {
183 if (atomic_dec_and_test(&mdev->local_cnt))
184 wake_up(&mdev->misc_wait);
185 }
186 return io_allowed;
187}
188
189#endif
190
191/**
192 * DOC: The transfer log
193 *
194 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100195 * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196 * of the list. There is always at least one &struct drbd_tl_epoch object.
197 *
198 * Each &struct drbd_tl_epoch has a circular double linked list of requests
199 * attached.
200 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100201static int tl_init(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
203 struct drbd_tl_epoch *b;
204
205 /* during device minor initialization, we may well use GFP_KERNEL */
206 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
207 if (!b)
208 return 0;
209 INIT_LIST_HEAD(&b->requests);
210 INIT_LIST_HEAD(&b->w.list);
211 b->next = NULL;
212 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200213 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
215
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100216 tconn->oldest_tle = b;
217 tconn->newest_tle = b;
218 INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 return 1;
221}
222
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100223static void tl_cleanup(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100225 if (tconn->oldest_tle != tconn->newest_tle)
226 conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
227 if (!list_empty(&tconn->out_of_sequence_requests))
228 conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
229 kfree(tconn->oldest_tle);
230 tconn->oldest_tle = NULL;
231 kfree(tconn->unused_spare_tle);
232 tconn->unused_spare_tle = NULL;
Andreas Gruenbacherd6287692011-01-13 23:05:39 +0100233}
234
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235/**
236 * _tl_add_barrier() - Adds a barrier to the transfer log
237 * @mdev: DRBD device.
238 * @new: Barrier to be added before the current head of the TL.
239 *
240 * The caller must hold the req_lock.
241 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100242void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243{
244 struct drbd_tl_epoch *newest_before;
245
246 INIT_LIST_HEAD(&new->requests);
247 INIT_LIST_HEAD(&new->w.list);
248 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
249 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200250 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100252 newest_before = tconn->newest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253 /* never send a barrier number == 0, because that is special-cased
254 * when using TCQ for our write ordering code */
255 new->br_number = (newest_before->br_number+1) ?: 1;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100256 if (tconn->newest_tle != new) {
257 tconn->newest_tle->next = new;
258 tconn->newest_tle = new;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259 }
260}
261
262/**
263 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
264 * @mdev: DRBD device.
265 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
266 * @set_size: Expected number of requests before that barrier.
267 *
268 * In case the passed barrier_nr or set_size does not match the oldest
269 * &struct drbd_tl_epoch objects this function will cause a termination
270 * of the connection.
271 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100272void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
273 unsigned int set_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100275 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276 struct drbd_tl_epoch *b, *nob; /* next old barrier */
277 struct list_head *le, *tle;
278 struct drbd_request *r;
279
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100280 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700281
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100282 b = tconn->oldest_tle;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283
284 /* first some paranoia code */
285 if (b == NULL) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100286 conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
287 barrier_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288 goto bail;
289 }
290 if (b->br_number != barrier_nr) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100291 conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
292 barrier_nr, b->br_number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 goto bail;
294 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200295 if (b->n_writes != set_size) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100296 conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
297 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700298 goto bail;
299 }
300
301 /* Clean up list of requests processed during current epoch */
302 list_for_each_safe(le, tle, &b->requests) {
303 r = list_entry(le, struct drbd_request, tl_requests);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100304 _req_mod(r, BARRIER_ACKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700305 }
306 /* There could be requests on the list waiting for completion
307 of the write to the local disk. To avoid corruptions of
308 slab's data structures we have to remove the lists head.
309
310 Also there could have been a barrier ack out of sequence, overtaking
311 the write acks - which would be a bug and violating write ordering.
312 To not deadlock in case we lose connection while such requests are
313 still pending, we need some way to find them for the
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100314 _req_mode(CONNECTION_LOST_WHILE_PENDING).
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315
316 These have been list_move'd to the out_of_sequence_requests list in
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100317 _req_mod(, BARRIER_ACKED) above.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 */
319 list_del_init(&b->requests);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100320 mdev = b->w.mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321
322 nob = b->next;
323 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100324 _tl_add_barrier(tconn, b);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325 if (nob)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100326 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 /* if nob == NULL b was the only barrier, and becomes the new
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100328 barrier. Therefore tconn->oldest_tle points already to b */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 } else {
330 D_ASSERT(nob != NULL);
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100331 tconn->oldest_tle = nob;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 kfree(b);
333 }
334
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100335 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 dec_ap_pending(mdev);
337
338 return;
339
340bail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100341 spin_unlock_irq(&tconn->req_lock);
342 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343}
344
Philipp Reisner617049a2010-12-22 12:48:31 +0100345
Philipp Reisner11b58e72010-05-12 17:08:26 +0200346/**
347 * _tl_restart() - Walks the transfer log, and applies an action to all requests
348 * @mdev: DRBD device.
349 * @what: The action/event to perform with all request objects
350 *
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100351 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
352 * RESTART_FROZEN_DISK_IO.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200353 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100354void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200355{
356 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200357 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200358 struct drbd_request *req;
359 int rv, n_writes, n_reads;
360
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100361 b = tconn->oldest_tle;
362 pn = &tconn->oldest_tle;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200363 while (b) {
364 n_writes = 0;
365 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200366 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200367 list_for_each_safe(le, tle, &b->requests) {
368 req = list_entry(le, struct drbd_request, tl_requests);
369 rv = _req_mod(req, what);
370
371 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
372 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
373 }
374 tmp = b->next;
375
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200376 if (n_writes) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100377 if (what == RESEND) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200378 b->n_writes = n_writes;
379 if (b->w.cb == NULL) {
380 b->w.cb = w_send_barrier;
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100381 inc_ap_pending(b->w.mdev);
382 set_bit(CREATE_BARRIER, &b->w.mdev->flags);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200383 }
384
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100385 drbd_queue_work(&tconn->data.work, &b->w);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200386 }
387 pn = &b->next;
388 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200389 if (n_reads)
390 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200391 /* there could still be requests on that ring list,
392 * in case local io is still pending */
393 list_del(&b->requests);
394
395 /* dec_ap_pending corresponding to queue_barrier.
396 * the newest barrier may not have been queued yet,
397 * in which case w.cb is still NULL. */
398 if (b->w.cb != NULL)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100399 dec_ap_pending(b->w.mdev);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200400
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100401 if (b == tconn->newest_tle) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200402 /* recycle, but reinit! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100403 if (tmp != NULL)
404 conn_err(tconn, "ASSERT FAILED tmp == NULL");
Philipp Reisner11b58e72010-05-12 17:08:26 +0200405 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200406 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200407 INIT_LIST_HEAD(&b->w.list);
408 b->w.cb = NULL;
409 b->br_number = net_random();
410 b->n_writes = 0;
411
412 *pn = b;
413 break;
414 }
415 *pn = tmp;
416 kfree(b);
417 }
418 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200419 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200420 }
421}
422
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423
424/**
425 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
426 * @mdev: DRBD device.
427 *
428 * This is called after the connection to the peer was lost. The storage covered
429 * by the requests on the transfer gets marked as our of sync. Called from the
430 * receiver thread and the worker thread.
431 */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100432void tl_clear(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700433{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100434 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 struct list_head *le, *tle;
436 struct drbd_request *r;
Philipp Reisnere90285e2011-03-22 12:51:21 +0100437 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100439 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100441 _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442
443 /* we expect this list to be empty. */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100444 if (!list_empty(&tconn->out_of_sequence_requests))
445 conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446
447 /* but just in case, clean it up anyways! */
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100448 list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 r = list_entry(le, struct drbd_request, tl_requests);
450 /* It would be nice to complete outside of spinlock.
451 * But this is easier for now. */
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100452 _req_mod(r, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453 }
454
455 /* ensure bit indicating barrier is required is clear */
Philipp Reisnere90285e2011-03-22 12:51:21 +0100456 idr_for_each_entry(&tconn->volumes, mdev, vnr)
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100457 clear_bit(CREATE_BARRIER, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100459 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460}
461
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100462void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
Philipp Reisner11b58e72010-05-12 17:08:26 +0200463{
Philipp Reisner2f5cdd02011-02-21 14:29:27 +0100464 spin_lock_irq(&tconn->req_lock);
465 _tl_restart(tconn, what);
466 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467}
468
Philipp Reisnerb411b362009-09-25 16:07:19 -0700469static int drbd_thread_setup(void *arg)
470{
471 struct drbd_thread *thi = (struct drbd_thread *) arg;
Philipp Reisner392c8802011-02-09 10:33:31 +0100472 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700473 unsigned long flags;
474 int retval;
475
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100476 snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
Philipp Reisner392c8802011-02-09 10:33:31 +0100477 thi->name[0], thi->tconn->name);
Philipp Reisnerf1b3a6e2011-02-08 15:35:58 +0100478
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479restart:
480 retval = thi->function(thi);
481
482 spin_lock_irqsave(&thi->t_lock, flags);
483
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100484 /* if the receiver has been "EXITING", the last thing it did
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 * was set the conn state to "StandAlone",
486 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
487 * and receiver thread will be "started".
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100488 * drbd_thread_start needs to set "RESTARTING" in that case.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700489 * t_state check and assignment needs to be within the same spinlock,
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100490 * so either thread_start sees EXITING, and can remap to RESTARTING,
491 * or thread_start see NONE, and can proceed as normal.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 */
493
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100494 if (thi->t_state == RESTARTING) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100495 conn_info(tconn, "Restarting %s thread\n", thi->name);
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100496 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497 spin_unlock_irqrestore(&thi->t_lock, flags);
498 goto restart;
499 }
500
501 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100502 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700503 smp_mb();
504 complete(&thi->stop);
505 spin_unlock_irqrestore(&thi->t_lock, flags);
506
Philipp Reisner392c8802011-02-09 10:33:31 +0100507 conn_info(tconn, "Terminating %s\n", current->comm);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508
509 /* Release mod reference taken when thread was started */
510 module_put(THIS_MODULE);
511 return retval;
512}
513
Philipp Reisner392c8802011-02-09 10:33:31 +0100514static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100515 int (*func) (struct drbd_thread *), char *name)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516{
517 spin_lock_init(&thi->t_lock);
518 thi->task = NULL;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100519 thi->t_state = NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 thi->function = func;
Philipp Reisner392c8802011-02-09 10:33:31 +0100521 thi->tconn = tconn;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100522 strncpy(thi->name, name, ARRAY_SIZE(thi->name));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523}
524
525int drbd_thread_start(struct drbd_thread *thi)
526{
Philipp Reisner392c8802011-02-09 10:33:31 +0100527 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700528 struct task_struct *nt;
529 unsigned long flags;
530
Philipp Reisnerb411b362009-09-25 16:07:19 -0700531 /* is used from state engine doing drbd_thread_stop_nowait,
532 * while holding the req lock irqsave */
533 spin_lock_irqsave(&thi->t_lock, flags);
534
535 switch (thi->t_state) {
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100536 case NONE:
Philipp Reisner392c8802011-02-09 10:33:31 +0100537 conn_info(tconn, "Starting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100538 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700539
540 /* Get ref on module for thread - this is released when thread exits */
541 if (!try_module_get(THIS_MODULE)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100542 conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700543 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100544 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 }
546
547 init_completion(&thi->stop);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548 thi->reset_cpu_mask = 1;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100549 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 spin_unlock_irqrestore(&thi->t_lock, flags);
551 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
552
553 nt = kthread_create(drbd_thread_setup, (void *) thi,
Philipp Reisner392c8802011-02-09 10:33:31 +0100554 "drbd_%c_%s", thi->name[0], thi->tconn->name);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 if (IS_ERR(nt)) {
Philipp Reisner392c8802011-02-09 10:33:31 +0100557 conn_err(tconn, "Couldn't start thread\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700558
559 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100560 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700561 }
562 spin_lock_irqsave(&thi->t_lock, flags);
563 thi->task = nt;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100564 thi->t_state = RUNNING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565 spin_unlock_irqrestore(&thi->t_lock, flags);
566 wake_up_process(nt);
567 break;
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100568 case EXITING:
569 thi->t_state = RESTARTING;
Philipp Reisner392c8802011-02-09 10:33:31 +0100570 conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100571 thi->name, current->comm, current->pid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700572 /* fall through */
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100573 case RUNNING:
574 case RESTARTING:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700575 default:
576 spin_unlock_irqrestore(&thi->t_lock, flags);
577 break;
578 }
579
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100580 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700581}
582
583
584void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
585{
586 unsigned long flags;
587
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100588 enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700589
590 /* may be called from state engine, holding the req lock irqsave */
591 spin_lock_irqsave(&thi->t_lock, flags);
592
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +0100593 if (thi->t_state == NONE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700594 spin_unlock_irqrestore(&thi->t_lock, flags);
595 if (restart)
596 drbd_thread_start(thi);
597 return;
598 }
599
600 if (thi->t_state != ns) {
601 if (thi->task == NULL) {
602 spin_unlock_irqrestore(&thi->t_lock, flags);
603 return;
604 }
605
606 thi->t_state = ns;
607 smp_mb();
608 init_completion(&thi->stop);
609 if (thi->task != current)
610 force_sig(DRBD_SIGKILL, thi->task);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611 }
612
613 spin_unlock_irqrestore(&thi->t_lock, flags);
614
615 if (wait)
616 wait_for_completion(&thi->stop);
617}
618
Philipp Reisner392c8802011-02-09 10:33:31 +0100619static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100620{
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100621 struct drbd_thread *thi =
622 task == tconn->receiver.task ? &tconn->receiver :
623 task == tconn->asender.task ? &tconn->asender :
624 task == tconn->worker.task ? &tconn->worker : NULL;
625
626 return thi;
627}
628
Philipp Reisner392c8802011-02-09 10:33:31 +0100629char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100630{
Philipp Reisner392c8802011-02-09 10:33:31 +0100631 struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100632 return thi ? thi->name : task->comm;
633}
634
Philipp Reisner80883192011-02-18 14:56:45 +0100635int conn_lowest_minor(struct drbd_tconn *tconn)
Philipp Reisner80822282011-02-08 12:46:30 +0100636{
Philipp Reisnere90285e2011-03-22 12:51:21 +0100637 int vnr = 0;
638 struct drbd_conf *mdev;
Philipp Reisner774b3052011-02-22 02:07:03 -0500639
Philipp Reisnere90285e2011-03-22 12:51:21 +0100640 mdev = idr_get_next(&tconn->volumes, &vnr);
641 if (!mdev)
Philipp Reisner774b3052011-02-22 02:07:03 -0500642 return -1;
Philipp Reisnere90285e2011-03-22 12:51:21 +0100643 return mdev_to_minor(mdev);
Philipp Reisner80822282011-02-08 12:46:30 +0100644}
Philipp Reisner774b3052011-02-22 02:07:03 -0500645
646#ifdef CONFIG_SMP
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647/**
648 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
649 * @mdev: DRBD device.
650 *
651 * Forces all threads of a device onto the same CPU. This is beneficial for
652 * DRBD's performance. May be overwritten by user's configuration.
653 */
Philipp Reisner80822282011-02-08 12:46:30 +0100654void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700655{
656 int ord, cpu;
657
658 /* user override. */
Philipp Reisner80822282011-02-08 12:46:30 +0100659 if (cpumask_weight(tconn->cpu_mask))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700660 return;
661
Philipp Reisner80822282011-02-08 12:46:30 +0100662 ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 for_each_online_cpu(cpu) {
664 if (ord-- == 0) {
Philipp Reisner80822282011-02-08 12:46:30 +0100665 cpumask_set_cpu(cpu, tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666 return;
667 }
668 }
669 /* should not be reached */
Philipp Reisner80822282011-02-08 12:46:30 +0100670 cpumask_setall(tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671}
672
673/**
674 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
675 * @mdev: DRBD device.
Philipp Reisnerbc31fe32011-02-07 11:14:38 +0100676 * @thi: drbd_thread object
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 *
678 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
679 * prematurely.
680 */
Philipp Reisner80822282011-02-08 12:46:30 +0100681void drbd_thread_current_set_cpu(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682{
683 struct task_struct *p = current;
Philipp Reisnerbed879a2011-02-04 14:00:37 +0100684
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 if (!thi->reset_cpu_mask)
686 return;
687 thi->reset_cpu_mask = 0;
Philipp Reisner392c8802011-02-09 10:33:31 +0100688 set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689}
690#endif
691
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100692static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100693{
694 h->magic = cpu_to_be32(DRBD_MAGIC);
695 h->command = cpu_to_be16(cmd);
696 h->length = cpu_to_be16(size);
697}
698
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100699static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100700{
701 h->magic = cpu_to_be16(DRBD_MAGIC_BIG);
702 h->command = cpu_to_be16(cmd);
703 h->length = cpu_to_be32(size);
704}
705
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100706static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h,
707 enum drbd_packet cmd, int size)
708{
709 if (tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET)
710 prepare_header95(&h->h95, cmd, size);
711 else
712 prepare_header80(&h->h80, cmd, size);
713}
714
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100715static void prepare_header(struct drbd_conf *mdev, struct p_header *h,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100716 enum drbd_packet cmd, int size)
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100717{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100718 _prepare_header(mdev->tconn, mdev->vnr, h, cmd, size);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100719}
720
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721/* the appropriate socket mutex must be held already */
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100722int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100723 enum drbd_packet cmd, struct p_header *h, size_t size,
724 unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725{
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100726 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100728 _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header));
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100729 err = drbd_send_all(tconn, sock, h, size, msg_flags);
730 if (err && !signal_pending(current))
731 conn_warn(tconn, "short send %s size=%d\n",
732 cmdname(cmd), (int)size);
733 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700734}
735
736/* don't pass the socket. we may only look at it
737 * when we hold the appropriate socket mutex.
738 */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100739int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100740 enum drbd_packet cmd, struct p_header *h, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 struct socket *sock;
Andreas Gruenbacher596a37f2011-03-15 23:55:59 +0100743 int err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700744
745 if (use_data_socket) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100746 mutex_lock(&tconn->data.mutex);
747 sock = tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748 } else {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100749 mutex_lock(&tconn->meta.mutex);
750 sock = tconn->meta.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700751 }
752
753 /* drbd_disconnect() could have called drbd_free_sock()
754 * while we were waiting in down()... */
755 if (likely(sock != NULL))
Andreas Gruenbacher596a37f2011-03-15 23:55:59 +0100756 err = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757
758 if (use_data_socket)
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100759 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700760 else
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100761 mutex_unlock(&tconn->meta.mutex);
Andreas Gruenbacher596a37f2011-03-15 23:55:59 +0100762 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763}
764
Philipp Reisner61120872011-02-08 09:50:54 +0100765int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766 size_t size)
767{
Philipp Reisner61120872011-02-08 09:50:54 +0100768 struct p_header80 h;
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +0100769 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770
Philipp Reisner61120872011-02-08 09:50:54 +0100771 prepare_header80(&h, cmd, size);
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +0100772 err = drbd_get_data_sock(tconn);
773 if (!err) {
774 err = drbd_send_all(tconn, tconn->data.socket, &h, sizeof(h), 0);
775 if (!err)
776 err = drbd_send_all(tconn, tconn->data.socket, data, size, 0);
777 drbd_put_data_sock(tconn);
778 }
779 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780}
781
Lars Ellenbergf3990022011-03-23 14:31:09 +0100782int drbd_send_sync_param(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783{
Philipp Reisner8e26f9c2010-07-06 17:25:54 +0200784 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 struct socket *sock;
786 int size, rv;
Philipp Reisner31890f42011-01-19 14:12:51 +0100787 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788
789 size = apv <= 87 ? sizeof(struct p_rs_param)
790 : apv == 88 ? sizeof(struct p_rs_param)
Lars Ellenbergf3990022011-03-23 14:31:09 +0100791 + strlen(mdev->tconn->net_conf->verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +0200792 : apv <= 94 ? sizeof(struct p_rs_param_89)
793 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794
795 /* used from admin command context and receiver/worker context.
796 * to avoid kmalloc, grab the socket right here,
797 * then use the pre-allocated sbuf there */
Philipp Reisnere42325a2011-01-19 13:55:45 +0100798 mutex_lock(&mdev->tconn->data.mutex);
799 sock = mdev->tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700800
801 if (likely(sock != NULL)) {
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100802 enum drbd_packet cmd =
803 apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804
Philipp Reisnere42325a2011-01-19 13:55:45 +0100805 p = &mdev->tconn->data.sbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806
807 /* initialize verify_alg and csums_alg */
808 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
809
Lars Ellenbergf3990022011-03-23 14:31:09 +0100810 if (get_ldev(mdev)) {
811 p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate);
812 p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead);
813 p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target);
814 p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target);
815 p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate);
816 put_ldev(mdev);
817 } else {
818 p->rate = cpu_to_be32(DRBD_RATE_DEF);
819 p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
820 p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
821 p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
822 p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
823 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824
825 if (apv >= 88)
Lars Ellenbergf3990022011-03-23 14:31:09 +0100826 strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827 if (apv >= 89)
Lars Ellenbergf3990022011-03-23 14:31:09 +0100828 strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829
Andreas Gruenbacher04dfa132011-03-15 23:51:21 +0100830 rv = !_drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 } else
832 rv = 0; /* not ok */
833
Philipp Reisnere42325a2011-01-19 13:55:45 +0100834 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700835
836 return rv;
837}
838
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100839int drbd_send_protocol(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840{
841 struct p_protocol *p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100842 int size, cf, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843
844 size = sizeof(struct p_protocol);
845
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100846 if (tconn->agreed_pro_version >= 87)
847 size += strlen(tconn->net_conf->integrity_alg) + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848
849 /* we must not recurse into our own queue,
850 * as that is blocked during handshake */
851 p = kmalloc(size, GFP_NOIO);
852 if (p == NULL)
853 return 0;
854
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100855 p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol);
856 p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p);
857 p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p);
858 p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p);
859 p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100861 cf = 0;
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100862 if (tconn->net_conf->want_lose)
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100863 cf |= CF_WANT_LOSE;
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100864 if (tconn->net_conf->dry_run) {
865 if (tconn->agreed_pro_version >= 92)
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100866 cf |= CF_DRY_RUN;
867 else {
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100868 conn_err(tconn, "--dry-run is not supported by peer");
Dan Carpenter7ac314c2010-04-22 14:27:23 +0200869 kfree(p);
Philipp Reisner148efa12011-01-15 00:21:15 +0100870 return -1;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +0100871 }
872 }
873 p->conn_flags = cpu_to_be32(cf);
874
Philipp Reisnerdc8228d2011-02-08 10:13:15 +0100875 if (tconn->agreed_pro_version >= 87)
876 strcpy(p->integrity_alg, tconn->net_conf->integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +0100878 rv = !conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 kfree(p);
880 return rv;
881}
882
883int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
884{
885 struct p_uuids p;
886 int i;
887
888 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
889 return 1;
890
891 for (i = UI_CURRENT; i < UI_SIZE; i++)
892 p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
893
894 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
895 p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
Philipp Reisner89e58e72011-01-19 13:12:45 +0100896 uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
898 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
899 p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
900
901 put_ldev(mdev);
902
Philipp Reisnerc0129492011-01-19 16:58:16 +0100903 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904}
905
906int drbd_send_uuids(struct drbd_conf *mdev)
907{
908 return _drbd_send_uuids(mdev, 0);
909}
910
911int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
912{
913 return _drbd_send_uuids(mdev, 8);
914}
915
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100916void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
917{
918 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
919 u64 *uuid = mdev->ldev->md.uuid;
920 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
921 text,
922 (unsigned long long)uuid[UI_CURRENT],
923 (unsigned long long)uuid[UI_BITMAP],
924 (unsigned long long)uuid[UI_HISTORY_START],
925 (unsigned long long)uuid[UI_HISTORY_END]);
926 put_ldev(mdev);
927 } else {
928 dev_info(DEV, "%s effective data uuid: %016llX\n",
929 text,
930 (unsigned long long)mdev->ed_uuid);
931 }
932}
933
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100934int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700935{
936 struct p_rs_uuid p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100937 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100939 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
940
Philipp Reisner4a23f262011-01-11 17:42:17 +0100941 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100942 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100943 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +0100944 drbd_md_sync(mdev);
945 p.uuid = cpu_to_be64(uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946
Philipp Reisnerc0129492011-01-19 16:58:16 +0100947 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948}
949
Philipp Reisnere89b5912010-03-24 17:11:33 +0100950int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951{
952 struct p_sizes p;
953 sector_t d_size, u_size;
Philipp Reisner99432fc2011-05-20 16:39:13 +0200954 int q_order_type, max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 int ok;
956
957 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
958 D_ASSERT(mdev->ldev->backing_bdev);
959 d_size = drbd_get_max_capacity(mdev->ldev);
960 u_size = mdev->ldev->dc.disk_size;
961 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +0200962 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
963 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964 put_ldev(mdev);
965 } else {
966 d_size = 0;
967 u_size = 0;
968 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +0200969 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 }
971
972 p.d_size = cpu_to_be64(d_size);
973 p.u_size = cpu_to_be64(u_size);
974 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
Philipp Reisner99432fc2011-05-20 16:39:13 +0200975 p.max_bio_size = cpu_to_be32(max_bio_size);
Philipp Reisnere89b5912010-03-24 17:11:33 +0100976 p.queue_order_type = cpu_to_be16(q_order_type);
977 p.dds_flags = cpu_to_be16(flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Philipp Reisnerc0129492011-01-19 16:58:16 +0100979 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 return ok;
981}
982
983/**
984 * drbd_send_state() - Sends the drbd state to the peer
985 * @mdev: DRBD device.
986 */
987int drbd_send_state(struct drbd_conf *mdev)
988{
989 struct socket *sock;
990 struct p_state p;
991 int ok = 0;
992
Philipp Reisnere42325a2011-01-19 13:55:45 +0100993 mutex_lock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
995 p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
Philipp Reisnere42325a2011-01-19 13:55:45 +0100996 sock = mdev->tconn->data.socket;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997
Andreas Gruenbacher04dfa132011-03-15 23:51:21 +0100998 if (likely(sock != NULL))
999 ok = !_drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000
Philipp Reisnere42325a2011-01-19 13:55:45 +01001001 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003 return ok;
1004}
1005
Philipp Reisnercf29c9d2011-02-11 15:11:24 +01001006int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd,
1007 union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008{
1009 struct p_req_state p;
1010
1011 p.mask = cpu_to_be32(mask.i);
1012 p.val = cpu_to_be32(val.i);
1013
Andreas Gruenbacher596a37f2011-03-15 23:55:59 +01001014 return !conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015}
1016
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001017int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018{
1019 struct p_req_state_reply p;
1020
1021 p.retcode = cpu_to_be32(retcode);
1022
Philipp Reisnerc0129492011-01-19 16:58:16 +01001023 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024}
1025
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001026int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
1027{
1028 struct p_req_state_reply p;
1029 enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
1030
1031 p.retcode = cpu_to_be32(retcode);
1032
Andreas Gruenbacher596a37f2011-03-15 23:55:59 +01001033 return !conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisner047cd4a2011-02-15 11:09:33 +01001034}
1035
Philipp Reisnerb411b362009-09-25 16:07:19 -07001036int fill_bitmap_rle_bits(struct drbd_conf *mdev,
1037 struct p_compressed_bm *p,
1038 struct bm_xfer_ctx *c)
1039{
1040 struct bitstream bs;
1041 unsigned long plain_bits;
1042 unsigned long tmp;
1043 unsigned long rl;
1044 unsigned len;
1045 unsigned toggle;
1046 int bits;
1047
1048 /* may we use this feature? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01001049 if ((mdev->tconn->net_conf->use_rle == 0) ||
Philipp Reisner31890f42011-01-19 14:12:51 +01001050 (mdev->tconn->agreed_pro_version < 90))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051 return 0;
1052
1053 if (c->bit_offset >= c->bm_bits)
1054 return 0; /* nothing to do. */
1055
1056 /* use at most thus many bytes */
1057 bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
1058 memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
1059 /* plain bits covered in this code string */
1060 plain_bits = 0;
1061
1062 /* p->encoding & 0x80 stores whether the first run length is set.
1063 * bit offset is implicit.
1064 * start with toggle == 2 to be able to tell the first iteration */
1065 toggle = 2;
1066
1067 /* see how much plain bits we can stuff into one packet
1068 * using RLE and VLI. */
1069 do {
1070 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
1071 : _drbd_bm_find_next(mdev, c->bit_offset);
1072 if (tmp == -1UL)
1073 tmp = c->bm_bits;
1074 rl = tmp - c->bit_offset;
1075
1076 if (toggle == 2) { /* first iteration */
1077 if (rl == 0) {
1078 /* the first checked bit was set,
1079 * store start value, */
1080 DCBP_set_start(p, 1);
1081 /* but skip encoding of zero run length */
1082 toggle = !toggle;
1083 continue;
1084 }
1085 DCBP_set_start(p, 0);
1086 }
1087
1088 /* paranoia: catch zero runlength.
1089 * can only happen if bitmap is modified while we scan it. */
1090 if (rl == 0) {
1091 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
1092 "t:%u bo:%lu\n", toggle, c->bit_offset);
1093 return -1;
1094 }
1095
1096 bits = vli_encode_bits(&bs, rl);
1097 if (bits == -ENOBUFS) /* buffer full */
1098 break;
1099 if (bits <= 0) {
1100 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
1101 return 0;
1102 }
1103
1104 toggle = !toggle;
1105 plain_bits += rl;
1106 c->bit_offset = tmp;
1107 } while (c->bit_offset < c->bm_bits);
1108
1109 len = bs.cur.b - p->code + !!bs.cur.bit;
1110
1111 if (plain_bits < (len << 3)) {
1112 /* incompressible with this method.
1113 * we need to rewind both word and bit position. */
1114 c->bit_offset -= plain_bits;
1115 bm_xfer_ctx_bit_to_word_offset(c);
1116 c->bit_offset = c->word_offset * BITS_PER_LONG;
1117 return 0;
1118 }
1119
1120 /* RLE + VLI was able to compress it just fine.
1121 * update c->word_offset. */
1122 bm_xfer_ctx_bit_to_word_offset(c);
1123
1124 /* store pad_bits */
1125 DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
1126
1127 return len;
1128}
1129
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001130/**
1131 * send_bitmap_rle_or_plain
1132 *
1133 * Return 0 when done, 1 when another iteration is needed, and a negative error
1134 * code upon failure.
1135 */
1136static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07001137send_bitmap_rle_or_plain(struct drbd_conf *mdev,
Philipp Reisnerc0129492011-01-19 16:58:16 +01001138 struct p_header *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001139{
1140 struct p_compressed_bm *p = (void*)h;
1141 unsigned long num_words;
1142 int len;
1143 int ok;
1144
1145 len = fill_bitmap_rle_bits(mdev, p, c);
1146
1147 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001148 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149
1150 if (len) {
1151 DCBP_set_code(p, RLE_VLI_Bits);
Andreas Gruenbacher04dfa132011-03-15 23:51:21 +01001152 ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h,
1153 sizeof(*p) + len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154
1155 c->packets[0]++;
1156 c->bytes[0] += sizeof(*p) + len;
1157
1158 if (c->bit_offset >= c->bm_bits)
1159 len = 0; /* DONE */
1160 } else {
1161 /* was not compressible.
1162 * send a buffer full of plain text bits instead. */
1163 num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
1164 len = num_words * sizeof(long);
1165 if (len)
1166 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
Andreas Gruenbacher04dfa132011-03-15 23:51:21 +01001167 ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP,
1168 h, sizeof(struct p_header80) + len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001169 c->word_offset += num_words;
1170 c->bit_offset = c->word_offset * BITS_PER_LONG;
1171
1172 c->packets[1]++;
Philipp Reisner0b70a132010-08-20 13:36:10 +02001173 c->bytes[1] += sizeof(struct p_header80) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174
1175 if (c->bit_offset > c->bm_bits)
1176 c->bit_offset = c->bm_bits;
1177 }
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001178 if (ok) {
1179 if (len == 0) {
1180 INFO_bm_xfer_stats(mdev, "send", c);
1181 return 0;
1182 } else
1183 return 1;
1184 }
1185 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186}
1187
1188/* See the comment at receive_bitmap() */
1189int _drbd_send_bitmap(struct drbd_conf *mdev)
1190{
1191 struct bm_xfer_ctx c;
Philipp Reisnerc0129492011-01-19 16:58:16 +01001192 struct p_header *p;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001193 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001195 if (!expect(mdev->bitmap))
1196 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197
1198 /* maybe we should use some per thread scratch page,
1199 * and allocate that during initial device creation? */
Philipp Reisnerc0129492011-01-19 16:58:16 +01001200 p = (struct p_header *) __get_free_page(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 if (!p) {
1202 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001203 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204 }
1205
1206 if (get_ldev(mdev)) {
1207 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1208 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
1209 drbd_bm_set_all(mdev);
1210 if (drbd_bm_write(mdev)) {
1211 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
1212 * but otherwise process as per normal - need to tell other
1213 * side that a full resync is required! */
1214 dev_err(DEV, "Failed to write bitmap to disk!\n");
1215 } else {
1216 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
1217 drbd_md_sync(mdev);
1218 }
1219 }
1220 put_ldev(mdev);
1221 }
1222
1223 c = (struct bm_xfer_ctx) {
1224 .bm_bits = drbd_bm_bits(mdev),
1225 .bm_words = drbd_bm_words(mdev),
1226 };
1227
1228 do {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001229 err = send_bitmap_rle_or_plain(mdev, p, &c);
1230 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231
1232 free_page((unsigned long) p);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01001233 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234}
1235
1236int drbd_send_bitmap(struct drbd_conf *mdev)
1237{
1238 int err;
1239
Andreas Gruenbacher11b0be22011-03-15 16:15:10 +01001240 if (drbd_get_data_sock(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241 return -1;
1242 err = !_drbd_send_bitmap(mdev);
Philipp Reisner61120872011-02-08 09:50:54 +01001243 drbd_put_data_sock(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 return err;
1245}
1246
1247int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
1248{
1249 int ok;
1250 struct p_barrier_ack p;
1251
1252 p.barrier = barrier_nr;
1253 p.set_size = cpu_to_be32(set_size);
1254
1255 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001256 return false;
Philipp Reisnerc0129492011-01-19 16:58:16 +01001257 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258 return ok;
1259}
1260
1261/**
1262 * _drbd_send_ack() - Sends an ack packet
1263 * @mdev: DRBD device.
1264 * @cmd: Packet command code.
1265 * @sector: sector, needs to be in big endian byte order
1266 * @blksize: size in byte, needs to be in big endian byte order
1267 * @block_id: Id, big endian byte order
1268 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001269static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
1270 u64 sector, u32 blksize, u64 block_id)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271{
1272 int ok;
1273 struct p_block_ack p;
1274
1275 p.sector = sector;
1276 p.block_id = block_id;
1277 p.blksize = blksize;
Andreas Gruenbacher8ccf2182011-02-24 11:35:43 +01001278 p.seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279
Philipp Reisnere42325a2011-01-19 13:55:45 +01001280 if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001281 return false;
Philipp Reisnerc0129492011-01-19 16:58:16 +01001282 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001283 return ok;
1284}
1285
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001286/* dp->sector and dp->block_id already/still in network byte order,
1287 * data_size is payload size according to dp->head,
1288 * and may need to be corrected for digest size. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001289int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001290 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291{
Philipp Reisnera0638452011-01-19 14:31:32 +01001292 data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1293 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
1295 dp->block_id);
1296}
1297
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001298int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299 struct p_block_req *rp)
1300{
1301 return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
1302}
1303
1304/**
1305 * drbd_send_ack() - Sends an ack packet
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001306 * @mdev: DRBD device
1307 * @cmd: packet command code
1308 * @peer_req: peer request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001309 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001310int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001311 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001312{
1313 return _drbd_send_ack(mdev, cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001314 cpu_to_be64(peer_req->i.sector),
1315 cpu_to_be32(peer_req->i.size),
1316 peer_req->block_id);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001317}
1318
1319/* This function misuses the block_id field to signal if the blocks
1320 * are is sync or not. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001321int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001322 sector_t sector, int blksize, u64 block_id)
1323{
1324 return _drbd_send_ack(mdev, cmd,
1325 cpu_to_be64(sector),
1326 cpu_to_be32(blksize),
1327 cpu_to_be64(block_id));
1328}
1329
1330int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1331 sector_t sector, int size, u64 block_id)
1332{
1333 int ok;
1334 struct p_block_req p;
1335
1336 p.sector = cpu_to_be64(sector);
1337 p.block_id = block_id;
1338 p.blksize = cpu_to_be32(size);
1339
Philipp Reisnerc0129492011-01-19 16:58:16 +01001340 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001341 return ok;
1342}
1343
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001344int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
1345 void *digest, int digest_size, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346{
1347 int ok;
1348 struct p_block_req p;
1349
Philipp Reisnerfd340c12011-01-19 16:57:39 +01001350 prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001351 p.sector = cpu_to_be64(sector);
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +01001352 p.block_id = ID_SYNCER /* unused */;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 p.blksize = cpu_to_be32(size);
1354
Philipp Reisnere42325a2011-01-19 13:55:45 +01001355 mutex_lock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001356
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001357 ok = (sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0));
1358 ok = ok && (digest_size == drbd_send(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001359
Philipp Reisnere42325a2011-01-19 13:55:45 +01001360 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001361
1362 return ok;
1363}
1364
1365int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
1366{
1367 int ok;
1368 struct p_block_req p;
1369
1370 p.sector = cpu_to_be64(sector);
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +01001371 p.block_id = ID_SYNCER /* unused */;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001372 p.blksize = cpu_to_be32(size);
1373
Philipp Reisnerc0129492011-01-19 16:58:16 +01001374 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, &p.head, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375 return ok;
1376}
1377
1378/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001379 * returns false if we should retry,
1380 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381 */
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001382static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383{
1384 int drop_it;
1385 /* long elapsed = (long)(jiffies - mdev->last_received); */
1386
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001387 drop_it = tconn->meta.socket == sock
1388 || !tconn->asender.task
1389 || get_t_state(&tconn->asender) != RUNNING
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001390 || tconn->cstate < C_WF_REPORT_PARAMS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391
1392 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001393 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001394
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001395 drop_it = !--tconn->ko_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396 if (!drop_it) {
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001397 conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
1398 current->comm, current->pid, tconn->ko_count);
1399 request_ping(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001400 }
1401
1402 return drop_it; /* && (mdev->state == R_PRIMARY) */;
1403}
1404
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001405static void drbd_update_congested(struct drbd_tconn *tconn)
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001406{
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001407 struct sock *sk = tconn->data.socket->sk;
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001408 if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001409 set_bit(NET_CONGESTED, &tconn->flags);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +01001410}
1411
Philipp Reisnerb411b362009-09-25 16:07:19 -07001412/* The idea of sendpage seems to be to put some kind of reference
1413 * to the page into the skb, and to hand it over to the NIC. In
1414 * this process get_page() gets called.
1415 *
1416 * As soon as the page was really sent over the network put_page()
1417 * gets called by some part of the network layer. [ NIC driver? ]
1418 *
1419 * [ get_page() / put_page() increment/decrement the count. If count
1420 * reaches 0 the page will be freed. ]
1421 *
1422 * This works nicely with pages from FSs.
1423 * But this means that in protocol A we might signal IO completion too early!
1424 *
1425 * In order not to corrupt data during a resync we must make sure
1426 * that we do not reuse our own buffer pages (EEs) to early, therefore
1427 * we have the net_ee list.
1428 *
1429 * XFS seems to have problems, still, it submits pages with page_count == 0!
1430 * As a workaround, we disable sendpage on pages
1431 * with page_count == 0 or PageSlab.
1432 */
1433static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001434 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001435{
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001436 int sent = drbd_send(mdev->tconn, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 kunmap(page);
1438 if (sent == size)
1439 mdev->send_cnt += size>>9;
1440 return sent == size;
1441}
1442
1443static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001444 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445{
1446 mm_segment_t oldfs = get_fs();
1447 int sent, ok;
1448 int len = size;
1449
1450 /* e.g. XFS meta- & log-data is in slab pages, which have a
1451 * page_count of 0 and/or have PageSlab() set.
1452 * we cannot use send_page for those, as that does get_page();
1453 * put_page(); and would cause either a VM_BUG directly, or
1454 * __page_cache_release a page that would actually still be referenced
1455 * by someone, leading to some obscure delayed Oops somewhere else. */
1456 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001457 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001459 msg_flags |= MSG_NOSIGNAL;
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001460 drbd_update_congested(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461 set_fs(KERNEL_DS);
1462 do {
Philipp Reisnere42325a2011-01-19 13:55:45 +01001463 sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464 offset, len,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001465 msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 if (sent == -EAGAIN) {
Philipp Reisner1a7ba642011-02-07 14:56:02 +01001467 if (we_should_drop_the_connection(mdev->tconn,
Philipp Reisnere42325a2011-01-19 13:55:45 +01001468 mdev->tconn->data.socket))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469 break;
1470 else
1471 continue;
1472 }
1473 if (sent <= 0) {
1474 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
1475 __func__, (int)size, len, sent);
1476 break;
1477 }
1478 len -= sent;
1479 offset += sent;
1480 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
1481 set_fs(oldfs);
Philipp Reisner01a311a2011-02-07 14:30:33 +01001482 clear_bit(NET_CONGESTED, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483
1484 ok = (len == 0);
1485 if (likely(ok))
1486 mdev->send_cnt += size>>9;
1487 return ok;
1488}
1489
1490static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1491{
1492 struct bio_vec *bvec;
1493 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001494 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495 __bio_for_each_segment(bvec, bio, i, 0) {
1496 if (!_drbd_no_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001497 bvec->bv_offset, bvec->bv_len,
1498 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499 return 0;
1500 }
1501 return 1;
1502}
1503
1504static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1505{
1506 struct bio_vec *bvec;
1507 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001508 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001509 __bio_for_each_segment(bvec, bio, i, 0) {
1510 if (!_drbd_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001511 bvec->bv_offset, bvec->bv_len,
1512 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513 return 0;
1514 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515 return 1;
1516}
1517
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001518static int _drbd_send_zc_ee(struct drbd_conf *mdev,
1519 struct drbd_peer_request *peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001520{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001521 struct page *page = peer_req->pages;
1522 unsigned len = peer_req->i.size;
1523
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001524 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001525 page_chain_for_each(page) {
1526 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Lars Ellenbergba11ad92010-05-25 16:26:16 +02001527 if (!_drbd_send_page(mdev, page, 0, l,
1528 page_chain_next(page) ? MSG_MORE : 0))
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001529 return 0;
1530 len -= l;
1531 }
1532 return 1;
1533}
1534
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001535static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
1536{
Philipp Reisner31890f42011-01-19 14:12:51 +01001537 if (mdev->tconn->agreed_pro_version >= 95)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001538 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001539 (bi_rw & REQ_FUA ? DP_FUA : 0) |
1540 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
1541 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
1542 else
Jens Axboe721a9602011-03-09 11:56:30 +01001543 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001544}
1545
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546/* Used to send write requests
1547 * R_PRIMARY -> Peer (P_DATA)
1548 */
1549int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
1550{
1551 int ok = 1;
1552 struct p_data p;
1553 unsigned int dp_flags = 0;
1554 void *dgb;
1555 int dgs;
1556
Andreas Gruenbacher11b0be22011-03-15 16:15:10 +01001557 if (drbd_get_data_sock(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 return 0;
1559
Philipp Reisnera0638452011-01-19 14:31:32 +01001560 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
1561 crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562
Philipp Reisnerfd340c12011-01-19 16:57:39 +01001563 prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001564 p.sector = cpu_to_be64(req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 p.block_id = (unsigned long)req;
Andreas Gruenbacher8ccf2182011-02-24 11:35:43 +01001566 p.seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001568 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
1569
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 if (mdev->state.conn >= C_SYNC_SOURCE &&
1571 mdev->state.conn <= C_PAUSED_SYNC_T)
1572 dp_flags |= DP_MAY_SET_IN_SYNC;
1573
1574 p.dp_flags = cpu_to_be32(dp_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 set_bit(UNPLUG_REMOTE, &mdev->flags);
1576 ok = (sizeof(p) ==
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001577 drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 if (ok && dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001579 dgb = mdev->tconn->int_dig_out;
1580 drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb);
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001581 ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582 }
1583 if (ok) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001584 /* For protocol A, we have to memcpy the payload into
1585 * socket buffers, as we may complete right away
1586 * as soon as we handed it over to tcp, at which point the data
1587 * pages may become invalid.
1588 *
1589 * For data-integrity enabled, we copy it as well, so we can be
1590 * sure that even if the bio pages may still be modified, it
1591 * won't change the data on the wire, thus if the digest checks
1592 * out ok after sending on this side, but does not fit on the
1593 * receiving side, we sure have detected corruption elsewhere.
1594 */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001595 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 ok = _drbd_send_bio(mdev, req->master_bio);
1597 else
1598 ok = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01001599
1600 /* double check digest, sometimes buffers have been modified in flight. */
1601 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02001602 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01001603 * currently supported in kernel crypto. */
1604 unsigned char digest[64];
Philipp Reisnera0638452011-01-19 14:31:32 +01001605 drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest);
1606 if (memcmp(mdev->tconn->int_dig_out, digest, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001607 dev_warn(DEV,
1608 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001609 (unsigned long long)req->i.sector, req->i.size);
Lars Ellenberg470be442010-11-10 10:36:52 +01001610 }
1611 } /* else if (dgs > 64) {
1612 ... Be noisy about digest too large ...
1613 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614 }
1615
Philipp Reisner61120872011-02-08 09:50:54 +01001616 drbd_put_data_sock(mdev->tconn);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001617
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618 return ok;
1619}
1620
1621/* answer packet, used to send data back for read requests:
1622 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
1623 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
1624 */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001625int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001626 struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627{
1628 int ok;
1629 struct p_data p;
1630 void *dgb;
1631 int dgs;
1632
Philipp Reisnera0638452011-01-19 14:31:32 +01001633 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
1634 crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001636 prepare_header(mdev, &p.head, cmd, sizeof(p) -
1637 sizeof(struct p_header80) +
1638 dgs + peer_req->i.size);
1639 p.sector = cpu_to_be64(peer_req->i.sector);
1640 p.block_id = peer_req->block_id;
Andreas Gruenbachercc378272011-01-26 18:01:50 +01001641 p.seq_num = 0; /* unused */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001642
1643 /* Only called by our kernel thread.
1644 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
1645 * in response to admin command or module unload.
1646 */
Andreas Gruenbacher11b0be22011-03-15 16:15:10 +01001647 if (drbd_get_data_sock(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001648 return 0;
1649
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001650 ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651 if (ok && dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001652 dgb = mdev->tconn->int_dig_out;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001653 drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb);
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001654 ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655 }
1656 if (ok)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001657 ok = _drbd_send_zc_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658
Philipp Reisner61120872011-02-08 09:50:54 +01001659 drbd_put_data_sock(mdev->tconn);
Philipp Reisnerbd26bfc52010-05-04 12:33:58 +02001660
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 return ok;
1662}
1663
Philipp Reisner73a01a12010-10-27 14:33:00 +02001664int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
1665{
1666 struct p_block_desc p;
1667
Andreas Gruenbacherace652a2011-01-03 17:09:58 +01001668 p.sector = cpu_to_be64(req->i.sector);
1669 p.blksize = cpu_to_be32(req->i.size);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001670
1671 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
1672}
1673
Philipp Reisnerb411b362009-09-25 16:07:19 -07001674/*
1675 drbd_send distinguishes two cases:
1676
1677 Packets sent via the data socket "sock"
1678 and packets sent via the meta data socket "msock"
1679
1680 sock msock
1681 -----------------+-------------------------+------------------------------
1682 timeout conf.timeout / 2 conf.timeout / 2
1683 timeout action send a ping via msock Abort communication
1684 and close all sockets
1685*/
1686
1687/*
1688 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
1689 */
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001690int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691 void *buf, size_t size, unsigned msg_flags)
1692{
1693 struct kvec iov;
1694 struct msghdr msg;
1695 int rv, sent = 0;
1696
1697 if (!sock)
Andreas Gruenbacherc0d42c82010-12-09 23:52:22 +01001698 return -EBADR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
1700 /* THINK if (signal_pending) return ... ? */
1701
1702 iov.iov_base = buf;
1703 iov.iov_len = size;
1704
1705 msg.msg_name = NULL;
1706 msg.msg_namelen = 0;
1707 msg.msg_control = NULL;
1708 msg.msg_controllen = 0;
1709 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
1710
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001711 if (sock == tconn->data.socket) {
1712 tconn->ko_count = tconn->net_conf->ko_count;
1713 drbd_update_congested(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001714 }
1715 do {
1716 /* STRANGE
1717 * tcp_sendmsg does _not_ use its size parameter at all ?
1718 *
1719 * -EAGAIN on timeout, -EINTR on signal.
1720 */
1721/* THINK
1722 * do we need to block DRBD_SIG if sock == &meta.socket ??
1723 * otherwise wake_asender() might interrupt some send_*Ack !
1724 */
1725 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
1726 if (rv == -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001727 if (we_should_drop_the_connection(tconn, sock))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001728 break;
1729 else
1730 continue;
1731 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001732 if (rv == -EINTR) {
1733 flush_signals(current);
1734 rv = 0;
1735 }
1736 if (rv < 0)
1737 break;
1738 sent += rv;
1739 iov.iov_base += rv;
1740 iov.iov_len -= rv;
1741 } while (sent < size);
1742
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001743 if (sock == tconn->data.socket)
1744 clear_bit(NET_CONGESTED, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745
1746 if (rv <= 0) {
1747 if (rv != -EAGAIN) {
Philipp Reisnerbedbd2a2011-02-07 15:08:48 +01001748 conn_err(tconn, "%s_sendmsg returned %d\n",
1749 sock == tconn->meta.socket ? "msock" : "sock",
1750 rv);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001751 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752 } else
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001753 conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001754 }
1755
1756 return sent;
1757}
1758
Andreas Gruenbacherfb708e42010-12-15 17:04:36 +01001759/**
1760 * drbd_send_all - Send an entire buffer
1761 *
1762 * Returns 0 upon success and a negative error value otherwise.
1763 */
1764int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
1765 size_t size, unsigned msg_flags)
1766{
1767 int err;
1768
1769 err = drbd_send(tconn, sock, buffer, size, msg_flags);
1770 if (err < 0)
1771 return err;
1772 if (err != size)
1773 return -EIO;
1774 return 0;
1775}
1776
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777static int drbd_open(struct block_device *bdev, fmode_t mode)
1778{
1779 struct drbd_conf *mdev = bdev->bd_disk->private_data;
1780 unsigned long flags;
1781 int rv = 0;
1782
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001783 mutex_lock(&drbd_main_mutex);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001784 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785 /* to have a stable mdev->state.role
1786 * and no race with updating open_cnt */
1787
1788 if (mdev->state.role != R_PRIMARY) {
1789 if (mode & FMODE_WRITE)
1790 rv = -EROFS;
1791 else if (!allow_oos)
1792 rv = -EMEDIUMTYPE;
1793 }
1794
1795 if (!rv)
1796 mdev->open_cnt++;
Philipp Reisner87eeee42011-01-19 14:16:30 +01001797 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001798 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799
1800 return rv;
1801}
1802
1803static int drbd_release(struct gendisk *gd, fmode_t mode)
1804{
1805 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001806 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001808 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001809 return 0;
1810}
1811
Philipp Reisnerb411b362009-09-25 16:07:19 -07001812static void drbd_set_defaults(struct drbd_conf *mdev)
1813{
Lars Ellenbergf3990022011-03-23 14:31:09 +01001814 /* Beware! The actual layout differs
1815 * between big endian and little endian */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816 mdev->state = (union drbd_state) {
1817 { .role = R_SECONDARY,
1818 .peer = R_UNKNOWN,
1819 .conn = C_STANDALONE,
1820 .disk = D_DISKLESS,
1821 .pdsk = D_UNKNOWN,
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001822 .susp = 0,
1823 .susp_nod = 0,
1824 .susp_fen = 0
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825 } };
1826}
1827
1828void drbd_init_set_defaults(struct drbd_conf *mdev)
1829{
1830 /* the memset(,0,) did most of this.
1831 * note: only assignments, no allocation in here */
1832
1833 drbd_set_defaults(mdev);
1834
Philipp Reisnerb411b362009-09-25 16:07:19 -07001835 atomic_set(&mdev->ap_bio_cnt, 0);
1836 atomic_set(&mdev->ap_pending_cnt, 0);
1837 atomic_set(&mdev->rs_pending_cnt, 0);
1838 atomic_set(&mdev->unacked_cnt, 0);
1839 atomic_set(&mdev->local_cnt, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001840 atomic_set(&mdev->pp_in_use, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001841 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02001842 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001843 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02001844 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001845
1846 mutex_init(&mdev->md_io_mutex);
Philipp Reisner8410da8f02011-02-11 20:11:10 +01001847 mutex_init(&mdev->own_state_mutex);
1848 mdev->state_mutex = &mdev->own_state_mutex;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850 spin_lock_init(&mdev->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 spin_lock_init(&mdev->peer_seq_lock);
1852 spin_lock_init(&mdev->epoch_lock);
1853
1854 INIT_LIST_HEAD(&mdev->active_ee);
1855 INIT_LIST_HEAD(&mdev->sync_ee);
1856 INIT_LIST_HEAD(&mdev->done_ee);
1857 INIT_LIST_HEAD(&mdev->read_ee);
1858 INIT_LIST_HEAD(&mdev->net_ee);
1859 INIT_LIST_HEAD(&mdev->resync_reads);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001860 INIT_LIST_HEAD(&mdev->resync_work.list);
1861 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001862 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02001864 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02001866
Philipp Reisner794abb72010-12-27 11:51:23 +01001867 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001869 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001870 mdev->md_sync_work.cb = w_md_sync;
1871 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01001872 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnera21e9292011-02-08 15:08:49 +01001873
1874 mdev->resync_work.mdev = mdev;
1875 mdev->unplug_work.mdev = mdev;
1876 mdev->go_diskless.mdev = mdev;
1877 mdev->md_sync_work.mdev = mdev;
1878 mdev->bm_io_work.w.mdev = mdev;
1879 mdev->start_resync_work.mdev = mdev;
1880
Philipp Reisnerb411b362009-09-25 16:07:19 -07001881 init_timer(&mdev->resync_timer);
1882 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01001883 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001884 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885 mdev->resync_timer.function = resync_timer_fn;
1886 mdev->resync_timer.data = (unsigned long) mdev;
1887 mdev->md_sync_timer.function = md_sync_timer_fn;
1888 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01001889 mdev->start_resync_timer.function = start_resync_timer_fn;
1890 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001891 mdev->request_timer.function = request_timer_fn;
1892 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001893
1894 init_waitqueue_head(&mdev->misc_wait);
1895 init_waitqueue_head(&mdev->state_wait);
1896 init_waitqueue_head(&mdev->ee_wait);
1897 init_waitqueue_head(&mdev->al_wait);
1898 init_waitqueue_head(&mdev->seq_wait);
1899
Philipp Reisnerfd340c12011-01-19 16:57:39 +01001900 /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */
Philipp Reisner2451fc32010-08-24 13:43:11 +02001901 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02001903 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
1904 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001905}
1906
1907void drbd_mdev_cleanup(struct drbd_conf *mdev)
1908{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001909 int i;
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01001910 if (mdev->tconn->receiver.t_state != NONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01001912 mdev->tconn->receiver.t_state);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001913
1914 /* no need to lock it, I'm the only thread alive */
1915 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
1916 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
1917 mdev->al_writ_cnt =
1918 mdev->bm_writ_cnt =
1919 mdev->read_cnt =
1920 mdev->recv_cnt =
1921 mdev->send_cnt =
1922 mdev->writ_cnt =
1923 mdev->p_size =
1924 mdev->rs_start =
1925 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001926 mdev->rs_failed = 0;
1927 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001928 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001929 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1930 mdev->rs_mark_left[i] = 0;
1931 mdev->rs_mark_time[i] = 0;
1932 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01001933 D_ASSERT(mdev->tconn->net_conf == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934
1935 drbd_set_my_capacity(mdev, 0);
1936 if (mdev->bitmap) {
1937 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01001938 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939 drbd_bm_cleanup(mdev);
1940 }
1941
1942 drbd_free_resources(mdev);
Philipp Reisner07782862010-08-31 12:00:50 +02001943 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944
1945 /*
1946 * currently we drbd_init_ee only on module load, so
1947 * we may do drbd_release_ee only on module unload!
1948 */
1949 D_ASSERT(list_empty(&mdev->active_ee));
1950 D_ASSERT(list_empty(&mdev->sync_ee));
1951 D_ASSERT(list_empty(&mdev->done_ee));
1952 D_ASSERT(list_empty(&mdev->read_ee));
1953 D_ASSERT(list_empty(&mdev->net_ee));
1954 D_ASSERT(list_empty(&mdev->resync_reads));
Philipp Reisnere42325a2011-01-19 13:55:45 +01001955 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
1956 D_ASSERT(list_empty(&mdev->tconn->meta.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001957 D_ASSERT(list_empty(&mdev->resync_work.list));
1958 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001959 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01001960
1961 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962}
1963
1964
1965static void drbd_destroy_mempools(void)
1966{
1967 struct page *page;
1968
1969 while (drbd_pp_pool) {
1970 page = drbd_pp_pool;
1971 drbd_pp_pool = (struct page *)page_private(page);
1972 __free_page(page);
1973 drbd_pp_vacant--;
1974 }
1975
1976 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
1977
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001978 if (drbd_md_io_bio_set)
1979 bioset_free(drbd_md_io_bio_set);
Lars Ellenberg35abf592011-02-23 12:39:46 +01001980 if (drbd_md_io_page_pool)
1981 mempool_destroy(drbd_md_io_page_pool);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982 if (drbd_ee_mempool)
1983 mempool_destroy(drbd_ee_mempool);
1984 if (drbd_request_mempool)
1985 mempool_destroy(drbd_request_mempool);
1986 if (drbd_ee_cache)
1987 kmem_cache_destroy(drbd_ee_cache);
1988 if (drbd_request_cache)
1989 kmem_cache_destroy(drbd_request_cache);
1990 if (drbd_bm_ext_cache)
1991 kmem_cache_destroy(drbd_bm_ext_cache);
1992 if (drbd_al_ext_cache)
1993 kmem_cache_destroy(drbd_al_ext_cache);
1994
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001995 drbd_md_io_bio_set = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01001996 drbd_md_io_page_pool = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001997 drbd_ee_mempool = NULL;
1998 drbd_request_mempool = NULL;
1999 drbd_ee_cache = NULL;
2000 drbd_request_cache = NULL;
2001 drbd_bm_ext_cache = NULL;
2002 drbd_al_ext_cache = NULL;
2003
2004 return;
2005}
2006
2007static int drbd_create_mempools(void)
2008{
2009 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002010 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011 int i;
2012
2013 /* prepare our caches and mempools */
2014 drbd_request_mempool = NULL;
2015 drbd_ee_cache = NULL;
2016 drbd_request_cache = NULL;
2017 drbd_bm_ext_cache = NULL;
2018 drbd_al_ext_cache = NULL;
2019 drbd_pp_pool = NULL;
Lars Ellenberg35abf592011-02-23 12:39:46 +01002020 drbd_md_io_page_pool = NULL;
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002021 drbd_md_io_bio_set = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022
2023 /* caches */
2024 drbd_request_cache = kmem_cache_create(
2025 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
2026 if (drbd_request_cache == NULL)
2027 goto Enomem;
2028
2029 drbd_ee_cache = kmem_cache_create(
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01002030 "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 if (drbd_ee_cache == NULL)
2032 goto Enomem;
2033
2034 drbd_bm_ext_cache = kmem_cache_create(
2035 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
2036 if (drbd_bm_ext_cache == NULL)
2037 goto Enomem;
2038
2039 drbd_al_ext_cache = kmem_cache_create(
2040 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
2041 if (drbd_al_ext_cache == NULL)
2042 goto Enomem;
2043
2044 /* mempools */
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01002045 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
2046 if (drbd_md_io_bio_set == NULL)
2047 goto Enomem;
2048
Lars Ellenberg35abf592011-02-23 12:39:46 +01002049 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
2050 if (drbd_md_io_page_pool == NULL)
2051 goto Enomem;
2052
Philipp Reisnerb411b362009-09-25 16:07:19 -07002053 drbd_request_mempool = mempool_create(number,
2054 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
2055 if (drbd_request_mempool == NULL)
2056 goto Enomem;
2057
2058 drbd_ee_mempool = mempool_create(number,
2059 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06002060 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002061 goto Enomem;
2062
2063 /* drbd's page pool */
2064 spin_lock_init(&drbd_pp_lock);
2065
2066 for (i = 0; i < number; i++) {
2067 page = alloc_page(GFP_HIGHUSER);
2068 if (!page)
2069 goto Enomem;
2070 set_page_private(page, (unsigned long)drbd_pp_pool);
2071 drbd_pp_pool = page;
2072 }
2073 drbd_pp_vacant = number;
2074
2075 return 0;
2076
2077Enomem:
2078 drbd_destroy_mempools(); /* in case we allocated some */
2079 return -ENOMEM;
2080}
2081
2082static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
2083 void *unused)
2084{
2085 /* just so we have it. you never know what interesting things we
2086 * might want to do here some day...
2087 */
2088
2089 return NOTIFY_DONE;
2090}
2091
2092static struct notifier_block drbd_notifier = {
2093 .notifier_call = drbd_notify_sys,
2094};
2095
2096static void drbd_release_ee_lists(struct drbd_conf *mdev)
2097{
2098 int rr;
2099
2100 rr = drbd_release_ee(mdev, &mdev->active_ee);
2101 if (rr)
2102 dev_err(DEV, "%d EEs in active list found!\n", rr);
2103
2104 rr = drbd_release_ee(mdev, &mdev->sync_ee);
2105 if (rr)
2106 dev_err(DEV, "%d EEs in sync list found!\n", rr);
2107
2108 rr = drbd_release_ee(mdev, &mdev->read_ee);
2109 if (rr)
2110 dev_err(DEV, "%d EEs in read list found!\n", rr);
2111
2112 rr = drbd_release_ee(mdev, &mdev->done_ee);
2113 if (rr)
2114 dev_err(DEV, "%d EEs in done list found!\n", rr);
2115
2116 rr = drbd_release_ee(mdev, &mdev->net_ee);
2117 if (rr)
2118 dev_err(DEV, "%d EEs in net list found!\n", rr);
2119}
2120
Philipp Reisner774b3052011-02-22 02:07:03 -05002121/* caution. no locking. */
2122void drbd_delete_device(unsigned int minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002123{
2124 struct drbd_conf *mdev = minor_to_mdev(minor);
2125
2126 if (!mdev)
2127 return;
2128
Lars Ellenberg569083c2011-03-07 09:49:02 +01002129 idr_remove(&mdev->tconn->volumes, mdev->vnr);
2130 idr_remove(&minors, minor);
2131 synchronize_rcu();
Philipp Reisner774b3052011-02-22 02:07:03 -05002132
Philipp Reisnerb411b362009-09-25 16:07:19 -07002133 /* paranoia asserts */
Andreas Gruenbacher70dc65e2010-12-21 14:46:57 +01002134 D_ASSERT(mdev->open_cnt == 0);
Philipp Reisnere42325a2011-01-19 13:55:45 +01002135 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002136 /* end paranoia asserts */
2137
2138 del_gendisk(mdev->vdisk);
2139
2140 /* cleanup stuff that may have been allocated during
2141 * device (re-)configuration or state changes */
2142
2143 if (mdev->this_bdev)
2144 bdput(mdev->this_bdev);
2145
2146 drbd_free_resources(mdev);
2147
2148 drbd_release_ee_lists(mdev);
2149
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150 lc_destroy(mdev->act_log);
2151 lc_destroy(mdev->resync);
2152
2153 kfree(mdev->p_uuid);
2154 /* mdev->p_uuid = NULL; */
2155
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156 /* cleanup the rest that has been
2157 * allocated from drbd_new_device
2158 * and actually free the mdev itself */
2159 drbd_free_mdev(mdev);
2160}
2161
2162static void drbd_cleanup(void)
2163{
2164 unsigned int i;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002165 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002166
2167 unregister_reboot_notifier(&drbd_notifier);
2168
Lars Ellenberg17a93f32010-11-24 10:37:35 +01002169 /* first remove proc,
2170 * drbdsetup uses it's presence to detect
2171 * whether DRBD is loaded.
2172 * If we would get stuck in proc removal,
2173 * but have netlink already deregistered,
2174 * some drbdsetup commands may wait forever
2175 * for an answer.
2176 */
2177 if (drbd_proc)
2178 remove_proc_entry("drbd", NULL);
2179
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002180 drbd_genl_unregister();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181
Philipp Reisner81a5d602011-02-22 19:53:16 -05002182 idr_for_each_entry(&minors, mdev, i)
2183 drbd_delete_device(i);
2184 drbd_destroy_mempools();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002185 unregister_blkdev(DRBD_MAJOR, "drbd");
2186
Philipp Reisner81a5d602011-02-22 19:53:16 -05002187 idr_destroy(&minors);
2188
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189 printk(KERN_INFO "drbd: module cleanup done.\n");
2190}
2191
2192/**
2193 * drbd_congested() - Callback for pdflush
2194 * @congested_data: User data
2195 * @bdi_bits: Bits pdflush is currently interested in
2196 *
2197 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
2198 */
2199static int drbd_congested(void *congested_data, int bdi_bits)
2200{
2201 struct drbd_conf *mdev = congested_data;
2202 struct request_queue *q;
2203 char reason = '-';
2204 int r = 0;
2205
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01002206 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207 /* DRBD has frozen IO */
2208 r = bdi_bits;
2209 reason = 'd';
2210 goto out;
2211 }
2212
2213 if (get_ldev(mdev)) {
2214 q = bdev_get_queue(mdev->ldev->backing_bdev);
2215 r = bdi_congested(&q->backing_dev_info, bdi_bits);
2216 put_ldev(mdev);
2217 if (r)
2218 reason = 'b';
2219 }
2220
Philipp Reisner01a311a2011-02-07 14:30:33 +01002221 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002222 r |= (1 << BDI_async_congested);
2223 reason = reason == 'b' ? 'a' : 'n';
2224 }
2225
2226out:
2227 mdev->congestion_reason = reason;
2228 return r;
2229}
2230
Philipp Reisner6699b652011-02-09 11:10:24 +01002231static void drbd_init_workqueue(struct drbd_work_queue* wq)
2232{
2233 sema_init(&wq->s, 0);
2234 spin_lock_init(&wq->q_lock);
2235 INIT_LIST_HEAD(&wq->q);
2236}
2237
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002238struct drbd_tconn *conn_by_name(const char *name)
2239{
2240 struct drbd_tconn *tconn;
2241
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002242 if (!name || !name[0])
2243 return NULL;
2244
Lars Ellenberg543cc102011-03-10 22:18:18 +01002245 mutex_lock(&drbd_cfg_mutex);
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002246 list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
2247 if (!strcmp(tconn->name, name))
2248 goto found;
2249 }
2250 tconn = NULL;
2251found:
Lars Ellenberg543cc102011-03-10 22:18:18 +01002252 mutex_unlock(&drbd_cfg_mutex);
Philipp Reisner1aba4d72011-02-21 15:38:08 +01002253 return tconn;
2254}
2255
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002256struct drbd_tconn *drbd_new_tconn(const char *name)
Philipp Reisner21114382011-01-19 12:26:59 +01002257{
2258 struct drbd_tconn *tconn;
2259
2260 tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
2261 if (!tconn)
2262 return NULL;
2263
2264 tconn->name = kstrdup(name, GFP_KERNEL);
2265 if (!tconn->name)
2266 goto fail;
2267
Philipp Reisner774b3052011-02-22 02:07:03 -05002268 if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
2269 goto fail;
2270
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002271 if (!tl_init(tconn))
2272 goto fail;
2273
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01002274 tconn->cstate = C_STANDALONE;
Philipp Reisner8410da8f02011-02-11 20:11:10 +01002275 mutex_init(&tconn->cstate_mutex);
Philipp Reisner6699b652011-02-09 11:10:24 +01002276 spin_lock_init(&tconn->req_lock);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01002277 atomic_set(&tconn->net_cnt, 0);
2278 init_waitqueue_head(&tconn->net_cnt_wait);
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01002279 init_waitqueue_head(&tconn->ping_wait);
Philipp Reisner062e8792011-02-08 11:09:18 +01002280 idr_init(&tconn->volumes);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01002281
Philipp Reisner6699b652011-02-09 11:10:24 +01002282 drbd_init_workqueue(&tconn->data.work);
2283 mutex_init(&tconn->data.mutex);
2284
2285 drbd_init_workqueue(&tconn->meta.work);
2286 mutex_init(&tconn->meta.mutex);
2287
Philipp Reisner392c8802011-02-09 10:33:31 +01002288 drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
2289 drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
2290 drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
2291
Lars Ellenbergf3990022011-03-23 14:31:09 +01002292 tconn->res_opts = (struct res_opts) {
2293 {}, 0, /* cpu_mask */
2294 DRBD_ON_NO_DATA_DEF, /* on_no_data */
2295 };
2296
Lars Ellenberg543cc102011-03-10 22:18:18 +01002297 mutex_lock(&drbd_cfg_mutex);
2298 list_add_tail(&tconn->all_tconn, &drbd_tconns);
2299 mutex_unlock(&drbd_cfg_mutex);
Philipp Reisner21114382011-01-19 12:26:59 +01002300
2301 return tconn;
2302
2303fail:
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01002304 tl_cleanup(tconn);
Philipp Reisner774b3052011-02-22 02:07:03 -05002305 free_cpumask_var(tconn->cpu_mask);
Philipp Reisner21114382011-01-19 12:26:59 +01002306 kfree(tconn->name);
2307 kfree(tconn);
2308
2309 return NULL;
2310}
2311
2312void drbd_free_tconn(struct drbd_tconn *tconn)
2313{
Philipp Reisner21114382011-01-19 12:26:59 +01002314 list_del(&tconn->all_tconn);
Philipp Reisner062e8792011-02-08 11:09:18 +01002315 idr_destroy(&tconn->volumes);
Philipp Reisner21114382011-01-19 12:26:59 +01002316
Philipp Reisner774b3052011-02-22 02:07:03 -05002317 free_cpumask_var(tconn->cpu_mask);
Philipp Reisner21114382011-01-19 12:26:59 +01002318 kfree(tconn->name);
Philipp Reisnerb42a70a2011-01-27 10:55:20 +01002319 kfree(tconn->int_dig_out);
2320 kfree(tconn->int_dig_in);
2321 kfree(tconn->int_dig_vv);
Philipp Reisner21114382011-01-19 12:26:59 +01002322 kfree(tconn);
2323}
2324
Philipp Reisner774b3052011-02-22 02:07:03 -05002325enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326{
2327 struct drbd_conf *mdev;
2328 struct gendisk *disk;
2329 struct request_queue *q;
Philipp Reisner774b3052011-02-22 02:07:03 -05002330 int vnr_got = vnr;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002331 int minor_got = minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002332 enum drbd_ret_code err = ERR_NOMEM;
Philipp Reisner774b3052011-02-22 02:07:03 -05002333
2334 mdev = minor_to_mdev(minor);
2335 if (mdev)
2336 return ERR_MINOR_EXISTS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002337
2338 /* GFP_KERNEL, we are outside of all write-out paths */
2339 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
2340 if (!mdev)
Philipp Reisner774b3052011-02-22 02:07:03 -05002341 return ERR_NOMEM;
2342
2343 mdev->tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002344 mdev->minor = minor;
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002345 mdev->vnr = vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002346
2347 drbd_init_set_defaults(mdev);
2348
2349 q = blk_alloc_queue(GFP_KERNEL);
2350 if (!q)
2351 goto out_no_q;
2352 mdev->rq_queue = q;
2353 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354
2355 disk = alloc_disk(1);
2356 if (!disk)
2357 goto out_no_disk;
2358 mdev->vdisk = disk;
2359
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002360 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002361
2362 disk->queue = q;
2363 disk->major = DRBD_MAJOR;
2364 disk->first_minor = minor;
2365 disk->fops = &drbd_ops;
2366 sprintf(disk->disk_name, "drbd%d", minor);
2367 disk->private_data = mdev;
2368
2369 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
2370 /* we have no partitions. we contain only ourselves. */
2371 mdev->this_bdev->bd_contains = mdev->this_bdev;
2372
2373 q->backing_dev_info.congested_fn = drbd_congested;
2374 q->backing_dev_info.congested_data = mdev;
2375
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01002376 blk_queue_make_request(q, drbd_make_request);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002377 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2378 This triggers a max_bio_size message upon first attach or connect */
2379 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002380 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2381 blk_queue_merge_bvec(q, drbd_merge_bvec);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002382 q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002383
2384 mdev->md_io_page = alloc_page(GFP_KERNEL);
2385 if (!mdev->md_io_page)
2386 goto out_no_io_page;
2387
2388 if (drbd_bm_init(mdev))
2389 goto out_no_bitmap;
Andreas Gruenbacherdac13892011-01-21 17:18:39 +01002390 mdev->read_requests = RB_ROOT;
Andreas Gruenbacherde696712011-01-20 15:00:24 +01002391 mdev->write_requests = RB_ROOT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002392
Philipp Reisnerb411b362009-09-25 16:07:19 -07002393 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
2394 if (!mdev->current_epoch)
2395 goto out_no_epoch;
2396
2397 INIT_LIST_HEAD(&mdev->current_epoch->list);
2398 mdev->epochs = 1;
2399
Lars Ellenberg8432b312011-03-08 16:11:16 +01002400 if (!idr_pre_get(&minors, GFP_KERNEL))
2401 goto out_no_minor_idr;
2402 if (idr_get_new_above(&minors, mdev, minor, &minor_got))
2403 goto out_no_minor_idr;
2404 if (minor_got != minor) {
2405 err = ERR_MINOR_EXISTS;
2406 drbd_msg_put_info("requested minor exists already");
2407 goto out_idr_remove_minor;
Lars Ellenberg569083c2011-03-07 09:49:02 +01002408 }
2409
Lars Ellenberg8432b312011-03-08 16:11:16 +01002410 if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
Lars Ellenberg569083c2011-03-07 09:49:02 +01002411 goto out_idr_remove_minor;
Lars Ellenberg8432b312011-03-08 16:11:16 +01002412 if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
2413 goto out_idr_remove_minor;
2414 if (vnr_got != vnr) {
2415 err = ERR_INVALID_REQUEST;
2416 drbd_msg_put_info("requested volume exists already");
2417 goto out_idr_remove_vol;
Philipp Reisner81a5d602011-02-22 19:53:16 -05002418 }
Philipp Reisner774b3052011-02-22 02:07:03 -05002419 add_disk(disk);
2420
Philipp Reisner2325eb62011-03-15 16:56:18 +01002421 /* inherit the connection state */
2422 mdev->state.conn = tconn->cstate;
2423 if (mdev->state.conn == C_WF_REPORT_PARAMS)
2424 drbd_connected(vnr, mdev, tconn);
2425
Philipp Reisner774b3052011-02-22 02:07:03 -05002426 return NO_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002427
Lars Ellenberg569083c2011-03-07 09:49:02 +01002428out_idr_remove_vol:
2429 idr_remove(&tconn->volumes, vnr_got);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002430out_idr_remove_minor:
2431 idr_remove(&minors, minor_got);
Lars Ellenberg569083c2011-03-07 09:49:02 +01002432 synchronize_rcu();
Lars Ellenberg8432b312011-03-08 16:11:16 +01002433out_no_minor_idr:
Philipp Reisner81a5d602011-02-22 19:53:16 -05002434 kfree(mdev->current_epoch);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002435out_no_epoch:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436 drbd_bm_cleanup(mdev);
2437out_no_bitmap:
2438 __free_page(mdev->md_io_page);
2439out_no_io_page:
2440 put_disk(disk);
2441out_no_disk:
2442 blk_cleanup_queue(q);
2443out_no_q:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444 kfree(mdev);
Lars Ellenberg8432b312011-03-08 16:11:16 +01002445 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446}
2447
2448/* counterpart of drbd_new_device.
2449 * last part of drbd_delete_device. */
2450void drbd_free_mdev(struct drbd_conf *mdev)
2451{
2452 kfree(mdev->current_epoch);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002453 if (mdev->bitmap) /* should no longer be there. */
2454 drbd_bm_cleanup(mdev);
2455 __free_page(mdev->md_io_page);
2456 put_disk(mdev->vdisk);
2457 blk_cleanup_queue(mdev->rq_queue);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 kfree(mdev);
2459}
2460
2461
2462int __init drbd_init(void)
2463{
2464 int err;
2465
Philipp Reisnerfd340c12011-01-19 16:57:39 +01002466 BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
2467 BUILD_BUG_ON(sizeof(struct p_handshake) != 80);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002468
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01002469 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002470 printk(KERN_ERR
Philipp Reisner81a5d602011-02-22 19:53:16 -05002471 "drbd: invalid minor_count (%d)\n", minor_count);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002472#ifdef MODULE
2473 return -EINVAL;
2474#else
2475 minor_count = 8;
2476#endif
2477 }
2478
Philipp Reisnerb411b362009-09-25 16:07:19 -07002479 err = register_blkdev(DRBD_MAJOR, "drbd");
2480 if (err) {
2481 printk(KERN_ERR
2482 "drbd: unable to register block device major %d\n",
2483 DRBD_MAJOR);
2484 return err;
2485 }
2486
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002487 err = drbd_genl_register();
2488 if (err) {
2489 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2490 goto fail;
2491 }
2492
2493
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494 register_reboot_notifier(&drbd_notifier);
2495
2496 /*
2497 * allocate all necessary structs
2498 */
2499 err = -ENOMEM;
2500
2501 init_waitqueue_head(&drbd_pp_wait);
2502
2503 drbd_proc = NULL; /* play safe for drbd_cleanup */
Philipp Reisner81a5d602011-02-22 19:53:16 -05002504 idr_init(&minors);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002505
2506 err = drbd_create_mempools();
2507 if (err)
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002508 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002509
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01002510 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 if (!drbd_proc) {
2512 printk(KERN_ERR "drbd: unable to register proc file\n");
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002513 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 }
2515
2516 rwlock_init(&global_state_lock);
Philipp Reisner21114382011-01-19 12:26:59 +01002517 INIT_LIST_HEAD(&drbd_tconns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002518
2519 printk(KERN_INFO "drbd: initialized. "
2520 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2521 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2522 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
2523 printk(KERN_INFO "drbd: registered as block device major %d\n",
2524 DRBD_MAJOR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002525
2526 return 0; /* Success! */
2527
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +01002528fail:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002529 drbd_cleanup();
2530 if (err == -ENOMEM)
2531 /* currently always the case */
2532 printk(KERN_ERR "drbd: ran out of memory\n");
2533 else
2534 printk(KERN_ERR "drbd: initialization failure\n");
2535 return err;
2536}
2537
2538void drbd_free_bc(struct drbd_backing_dev *ldev)
2539{
2540 if (ldev == NULL)
2541 return;
2542
Tejun Heoe525fd82010-11-13 11:55:17 +01002543 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2544 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002545
2546 kfree(ldev);
2547}
2548
Philipp Reisner360cc742011-02-08 14:29:53 +01002549void drbd_free_sock(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550{
Philipp Reisner360cc742011-02-08 14:29:53 +01002551 if (tconn->data.socket) {
2552 mutex_lock(&tconn->data.mutex);
2553 kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
2554 sock_release(tconn->data.socket);
2555 tconn->data.socket = NULL;
2556 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002557 }
Philipp Reisner360cc742011-02-08 14:29:53 +01002558 if (tconn->meta.socket) {
2559 mutex_lock(&tconn->meta.mutex);
2560 kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
2561 sock_release(tconn->meta.socket);
2562 tconn->meta.socket = NULL;
2563 mutex_unlock(&tconn->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002564 }
2565}
2566
2567
2568void drbd_free_resources(struct drbd_conf *mdev)
2569{
Lars Ellenbergf3990022011-03-23 14:31:09 +01002570 crypto_free_hash(mdev->tconn->csums_tfm);
2571 mdev->tconn->csums_tfm = NULL;
2572 crypto_free_hash(mdev->tconn->verify_tfm);
2573 mdev->tconn->verify_tfm = NULL;
Philipp Reisnera0638452011-01-19 14:31:32 +01002574 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
2575 mdev->tconn->cram_hmac_tfm = NULL;
2576 crypto_free_hash(mdev->tconn->integrity_w_tfm);
2577 mdev->tconn->integrity_w_tfm = NULL;
2578 crypto_free_hash(mdev->tconn->integrity_r_tfm);
2579 mdev->tconn->integrity_r_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002580
Philipp Reisner360cc742011-02-08 14:29:53 +01002581 drbd_free_sock(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002582
2583 __no_warn(local,
2584 drbd_free_bc(mdev->ldev);
2585 mdev->ldev = NULL;);
2586}
2587
2588/* meta data management */
2589
2590struct meta_data_on_disk {
2591 u64 la_size; /* last agreed size. */
2592 u64 uuid[UI_SIZE]; /* UUIDs. */
2593 u64 device_uuid;
2594 u64 reserved_u64_1;
2595 u32 flags; /* MDF */
2596 u32 magic;
2597 u32 md_size_sect;
2598 u32 al_offset; /* offset to this block */
2599 u32 al_nr_extents; /* important for restoring the AL */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002600 /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002601 u32 bm_offset; /* offset to the bitmap, from here */
2602 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02002603 u32 la_peer_max_bio_size; /* last peer max_bio_size */
2604 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002605
2606} __packed;
2607
2608/**
2609 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
2610 * @mdev: DRBD device.
2611 */
2612void drbd_md_sync(struct drbd_conf *mdev)
2613{
2614 struct meta_data_on_disk *buffer;
2615 sector_t sector;
2616 int i;
2617
Lars Ellenbergee15b032010-09-03 10:00:09 +02002618 del_timer(&mdev->md_sync_timer);
2619 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
2621 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622
2623 /* We use here D_FAILED and not D_ATTACHING because we try to write
2624 * metadata even if we detach due to a disk failure! */
2625 if (!get_ldev_if_state(mdev, D_FAILED))
2626 return;
2627
Philipp Reisnerb411b362009-09-25 16:07:19 -07002628 mutex_lock(&mdev->md_io_mutex);
2629 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2630 memset(buffer, 0, 512);
2631
2632 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
2633 for (i = UI_CURRENT; i < UI_SIZE; i++)
2634 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
2635 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
2636 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
2637
2638 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
2639 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
2640 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
2641 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
2642 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
2643
2644 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002645 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002646
2647 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
2648 sector = mdev->ldev->md.md_offset;
2649
Lars Ellenberg3f3a9b82010-09-01 15:12:12 +02002650 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002651 /* this was a try anyways ... */
2652 dev_err(DEV, "meta data update failed!\n");
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002653 drbd_chk_io_error(mdev, 1, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002654 }
2655
2656 /* Update mdev->ldev->md.la_size_sect,
2657 * since we updated it on metadata. */
2658 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
2659
2660 mutex_unlock(&mdev->md_io_mutex);
2661 put_ldev(mdev);
2662}
2663
2664/**
2665 * drbd_md_read() - Reads in the meta data super block
2666 * @mdev: DRBD device.
2667 * @bdev: Device from which the meta data should be read in.
2668 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01002669 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07002670 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
2671 */
2672int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2673{
2674 struct meta_data_on_disk *buffer;
2675 int i, rv = NO_ERROR;
2676
2677 if (!get_ldev_if_state(mdev, D_ATTACHING))
2678 return ERR_IO_MD_DISK;
2679
Philipp Reisnerb411b362009-09-25 16:07:19 -07002680 mutex_lock(&mdev->md_io_mutex);
2681 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2682
2683 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002684 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07002685 called BEFORE disk is attached */
2686 dev_err(DEV, "Error while reading metadata.\n");
2687 rv = ERR_IO_MD_DISK;
2688 goto err;
2689 }
2690
Andreas Gruenbachere7fad8a2011-01-11 13:54:02 +01002691 if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002692 dev_err(DEV, "Error while reading metadata, magic not found.\n");
2693 rv = ERR_MD_INVALID;
2694 goto err;
2695 }
2696 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
2697 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
2698 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
2699 rv = ERR_MD_INVALID;
2700 goto err;
2701 }
2702 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
2703 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
2704 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
2705 rv = ERR_MD_INVALID;
2706 goto err;
2707 }
2708 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
2709 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
2710 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
2711 rv = ERR_MD_INVALID;
2712 goto err;
2713 }
2714
2715 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
2716 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
2717 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
2718 rv = ERR_MD_INVALID;
2719 goto err;
2720 }
2721
2722 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
2723 for (i = UI_CURRENT; i < UI_SIZE; i++)
2724 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
2725 bdev->md.flags = be32_to_cpu(buffer->flags);
Lars Ellenbergf3990022011-03-23 14:31:09 +01002726 bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002727 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
2728
Philipp Reisner87eeee42011-01-19 14:16:30 +01002729 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002730 if (mdev->state.conn < C_CONNECTED) {
2731 int peer;
2732 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
2733 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
2734 mdev->peer_max_bio_size = peer;
2735 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01002736 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002737
Lars Ellenbergf3990022011-03-23 14:31:09 +01002738 if (bdev->dc.al_extents < 7)
2739 bdev->dc.al_extents = 127;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740
2741 err:
2742 mutex_unlock(&mdev->md_io_mutex);
2743 put_ldev(mdev);
2744
2745 return rv;
2746}
2747
2748/**
2749 * drbd_md_mark_dirty() - Mark meta data super block as dirty
2750 * @mdev: DRBD device.
2751 *
2752 * Call this function if you change anything that should be written to
2753 * the meta-data super block. This function sets MD_DIRTY, and starts a
2754 * timer that ensures that within five seconds you have to call drbd_md_sync().
2755 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02002756#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02002757void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
2758{
2759 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
2760 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
2761 mdev->last_md_mark_dirty.line = line;
2762 mdev->last_md_mark_dirty.func = func;
2763 }
2764}
2765#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766void drbd_md_mark_dirty(struct drbd_conf *mdev)
2767{
Lars Ellenbergee15b032010-09-03 10:00:09 +02002768 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02002769 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770}
Lars Ellenbergee15b032010-09-03 10:00:09 +02002771#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07002772
2773static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
2774{
2775 int i;
2776
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002777 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002779}
2780
2781void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
2782{
2783 if (idx == UI_CURRENT) {
2784 if (mdev->state.role == R_PRIMARY)
2785 val |= 1;
2786 else
2787 val &= ~((u64)1);
2788
2789 drbd_set_ed_uuid(mdev, val);
2790 }
2791
2792 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 drbd_md_mark_dirty(mdev);
2794}
2795
2796
2797void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
2798{
2799 if (mdev->ldev->md.uuid[idx]) {
2800 drbd_uuid_move_history(mdev);
2801 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002802 }
2803 _drbd_uuid_set(mdev, idx, val);
2804}
2805
2806/**
2807 * drbd_uuid_new_current() - Creates a new current UUID
2808 * @mdev: DRBD device.
2809 *
2810 * Creates a new current UUID, and rotates the old current UUID into
2811 * the bitmap slot. Causes an incremental resync upon next connect.
2812 */
2813void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
2814{
2815 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002816 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002818 if (bm_uuid)
2819 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
2820
Philipp Reisnerb411b362009-09-25 16:07:19 -07002821 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822
2823 get_random_bytes(&val, sizeof(u64));
2824 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002825 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02002826 /* get it to stable storage _now_ */
2827 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828}
2829
2830void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
2831{
2832 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
2833 return;
2834
2835 if (val == 0) {
2836 drbd_uuid_move_history(mdev);
2837 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
2838 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002839 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002840 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
2841 if (bm_uuid)
2842 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002844 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845 }
2846 drbd_md_mark_dirty(mdev);
2847}
2848
2849/**
2850 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
2851 * @mdev: DRBD device.
2852 *
2853 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
2854 */
2855int drbd_bmio_set_n_write(struct drbd_conf *mdev)
2856{
2857 int rv = -EIO;
2858
2859 if (get_ldev_if_state(mdev, D_ATTACHING)) {
2860 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
2861 drbd_md_sync(mdev);
2862 drbd_bm_set_all(mdev);
2863
2864 rv = drbd_bm_write(mdev);
2865
2866 if (!rv) {
2867 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2868 drbd_md_sync(mdev);
2869 }
2870
2871 put_ldev(mdev);
2872 }
2873
2874 return rv;
2875}
2876
2877/**
2878 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
2879 * @mdev: DRBD device.
2880 *
2881 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
2882 */
2883int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
2884{
2885 int rv = -EIO;
2886
Philipp Reisner07782862010-08-31 12:00:50 +02002887 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 if (get_ldev_if_state(mdev, D_ATTACHING)) {
2889 drbd_bm_clear_all(mdev);
2890 rv = drbd_bm_write(mdev);
2891 put_ldev(mdev);
2892 }
2893
2894 return rv;
2895}
2896
Philipp Reisner00d56942011-02-09 18:09:48 +01002897static int w_bitmap_io(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898{
2899 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01002900 struct drbd_conf *mdev = w->mdev;
Lars Ellenberg02851e92010-12-16 14:47:39 +01002901 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
2903 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
2904
Lars Ellenberg02851e92010-12-16 14:47:39 +01002905 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002906 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01002907 rv = work->io_fn(mdev);
2908 drbd_bm_unlock(mdev);
2909 put_ldev(mdev);
2910 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002911
Lars Ellenberg4738fa12011-02-21 13:20:55 +01002912 clear_bit_unlock(BITMAP_IO, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002913 wake_up(&mdev->misc_wait);
2914
2915 if (work->done)
2916 work->done(mdev, rv);
2917
2918 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
2919 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002920 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002921
2922 return 1;
2923}
2924
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02002925void drbd_ldev_destroy(struct drbd_conf *mdev)
2926{
2927 lc_destroy(mdev->resync);
2928 mdev->resync = NULL;
2929 lc_destroy(mdev->act_log);
2930 mdev->act_log = NULL;
2931 __no_warn(local,
2932 drbd_free_bc(mdev->ldev);
2933 mdev->ldev = NULL;);
2934
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02002935 clear_bit(GO_DISKLESS, &mdev->flags);
2936}
2937
Philipp Reisner00d56942011-02-09 18:09:48 +01002938static int w_go_diskless(struct drbd_work *w, int unused)
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002939{
Philipp Reisner00d56942011-02-09 18:09:48 +01002940 struct drbd_conf *mdev = w->mdev;
2941
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002942 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02002943 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
2944 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02002945 * the protected members anymore, though, so once put_ldev reaches zero
2946 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002947 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002948 return 1;
2949}
2950
2951void drbd_go_diskless(struct drbd_conf *mdev)
2952{
2953 D_ASSERT(mdev->state.disk == D_FAILED);
2954 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01002955 drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02002956}
2957
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958/**
2959 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
2960 * @mdev: DRBD device.
2961 * @io_fn: IO callback to be called when bitmap IO is possible
2962 * @done: callback to be called after the bitmap IO was performed
2963 * @why: Descriptive text of the reason for doing the IO
2964 *
2965 * While IO on the bitmap happens we freeze application IO thus we ensure
2966 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
2967 * called from worker context. It MUST NOT be used while a previous such
2968 * work is still pending!
2969 */
2970void drbd_queue_bitmap_io(struct drbd_conf *mdev,
2971 int (*io_fn)(struct drbd_conf *),
2972 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002973 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01002975 D_ASSERT(current == mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002976
2977 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
2978 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
2979 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
2980 if (mdev->bm_io_work.why)
2981 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
2982 why, mdev->bm_io_work.why);
2983
2984 mdev->bm_io_work.io_fn = io_fn;
2985 mdev->bm_io_work.done = done;
2986 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002987 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988
Philipp Reisner87eeee42011-01-19 14:16:30 +01002989 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990 set_bit(BITMAP_IO, &mdev->flags);
2991 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01002992 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnere42325a2011-01-19 13:55:45 +01002993 drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01002995 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002996}
2997
2998/**
2999 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
3000 * @mdev: DRBD device.
3001 * @io_fn: IO callback to be called when bitmap IO is possible
3002 * @why: Descriptive text of the reason for doing the IO
3003 *
3004 * freezes application IO while that the actual IO operations runs. This
3005 * functions MAY NOT be called from worker context.
3006 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003007int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
3008 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003009{
3010 int rv;
3011
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003012 D_ASSERT(current != mdev->tconn->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003014 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3015 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003017 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018 rv = io_fn(mdev);
3019 drbd_bm_unlock(mdev);
3020
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003021 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3022 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003023
3024 return rv;
3025}
3026
3027void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3028{
3029 if ((mdev->ldev->md.flags & flag) != flag) {
3030 drbd_md_mark_dirty(mdev);
3031 mdev->ldev->md.flags |= flag;
3032 }
3033}
3034
3035void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3036{
3037 if ((mdev->ldev->md.flags & flag) != 0) {
3038 drbd_md_mark_dirty(mdev);
3039 mdev->ldev->md.flags &= ~flag;
3040 }
3041}
3042int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3043{
3044 return (bdev->md.flags & flag) != 0;
3045}
3046
3047static void md_sync_timer_fn(unsigned long data)
3048{
3049 struct drbd_conf *mdev = (struct drbd_conf *) data;
3050
Philipp Reisnere42325a2011-01-19 13:55:45 +01003051 drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052}
3053
Philipp Reisner00d56942011-02-09 18:09:48 +01003054static int w_md_sync(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055{
Philipp Reisner00d56942011-02-09 18:09:48 +01003056 struct drbd_conf *mdev = w->mdev;
3057
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02003059#ifdef DEBUG
3060 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
3061 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
3062#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003063 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064 return 1;
3065}
3066
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003067const char *cmdname(enum drbd_packet cmd)
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003068{
3069 /* THINK may need to become several global tables
3070 * when we want to support more than
3071 * one PRO_VERSION */
3072 static const char *cmdnames[] = {
3073 [P_DATA] = "Data",
3074 [P_DATA_REPLY] = "DataReply",
3075 [P_RS_DATA_REPLY] = "RSDataReply",
3076 [P_BARRIER] = "Barrier",
3077 [P_BITMAP] = "ReportBitMap",
3078 [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
3079 [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
3080 [P_UNPLUG_REMOTE] = "UnplugRemote",
3081 [P_DATA_REQUEST] = "DataRequest",
3082 [P_RS_DATA_REQUEST] = "RSDataRequest",
3083 [P_SYNC_PARAM] = "SyncParam",
3084 [P_SYNC_PARAM89] = "SyncParam89",
3085 [P_PROTOCOL] = "ReportProtocol",
3086 [P_UUIDS] = "ReportUUIDs",
3087 [P_SIZES] = "ReportSizes",
3088 [P_STATE] = "ReportState",
3089 [P_SYNC_UUID] = "ReportSyncUUID",
3090 [P_AUTH_CHALLENGE] = "AuthChallenge",
3091 [P_AUTH_RESPONSE] = "AuthResponse",
3092 [P_PING] = "Ping",
3093 [P_PING_ACK] = "PingAck",
3094 [P_RECV_ACK] = "RecvAck",
3095 [P_WRITE_ACK] = "WriteAck",
3096 [P_RS_WRITE_ACK] = "RSWriteAck",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003097 [P_DISCARD_WRITE] = "DiscardWrite",
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003098 [P_NEG_ACK] = "NegAck",
3099 [P_NEG_DREPLY] = "NegDReply",
3100 [P_NEG_RS_DREPLY] = "NegRSDReply",
3101 [P_BARRIER_ACK] = "BarrierAck",
3102 [P_STATE_CHG_REQ] = "StateChgRequest",
3103 [P_STATE_CHG_REPLY] = "StateChgReply",
3104 [P_OV_REQUEST] = "OVRequest",
3105 [P_OV_REPLY] = "OVReply",
3106 [P_OV_RESULT] = "OVResult",
3107 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
3108 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3109 [P_COMPRESSED_BITMAP] = "CBitmap",
3110 [P_DELAY_PROBE] = "DelayProbe",
3111 [P_OUT_OF_SYNC] = "OutOfSync",
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003112 [P_RETRY_WRITE] = "RetryWrite",
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003113 };
3114
3115 if (cmd == P_HAND_SHAKE_M)
3116 return "HandShakeM";
3117 if (cmd == P_HAND_SHAKE_S)
3118 return "HandShakeS";
3119 if (cmd == P_HAND_SHAKE)
3120 return "HandShake";
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003121 if (cmd >= ARRAY_SIZE(cmdnames))
Andreas Gruenbacherf2ad9062011-01-26 17:13:25 +01003122 return "Unknown";
3123 return cmdnames[cmd];
3124}
3125
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01003126/**
3127 * drbd_wait_misc - wait for a request to make progress
3128 * @mdev: device associated with the request
3129 * @i: the struct drbd_interval embedded in struct drbd_request or
3130 * struct drbd_peer_request
3131 */
3132int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
3133{
3134 struct net_conf *net_conf = mdev->tconn->net_conf;
3135 DEFINE_WAIT(wait);
3136 long timeout;
3137
3138 if (!net_conf)
3139 return -ETIMEDOUT;
3140 timeout = MAX_SCHEDULE_TIMEOUT;
3141 if (net_conf->ko_count)
3142 timeout = net_conf->timeout * HZ / 10 * net_conf->ko_count;
3143
3144 /* Indicate to wake up mdev->misc_wait on progress. */
3145 i->waiting = true;
3146 prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
3147 spin_unlock_irq(&mdev->tconn->req_lock);
3148 timeout = schedule_timeout(timeout);
3149 finish_wait(&mdev->misc_wait, &wait);
3150 spin_lock_irq(&mdev->tconn->req_lock);
3151 if (!timeout || mdev->state.conn < C_CONNECTED)
3152 return -ETIMEDOUT;
3153 if (signal_pending(current))
3154 return -ERESTARTSYS;
3155 return 0;
3156}
3157
Philipp Reisnerb411b362009-09-25 16:07:19 -07003158#ifdef CONFIG_DRBD_FAULT_INJECTION
3159/* Fault insertion support including random number generator shamelessly
3160 * stolen from kernel/rcutorture.c */
3161struct fault_random_state {
3162 unsigned long state;
3163 unsigned long count;
3164};
3165
3166#define FAULT_RANDOM_MULT 39916801 /* prime */
3167#define FAULT_RANDOM_ADD 479001701 /* prime */
3168#define FAULT_RANDOM_REFRESH 10000
3169
3170/*
3171 * Crude but fast random-number generator. Uses a linear congruential
3172 * generator, with occasional help from get_random_bytes().
3173 */
3174static unsigned long
3175_drbd_fault_random(struct fault_random_state *rsp)
3176{
3177 long refresh;
3178
Roel Kluin49829ea2009-12-15 22:55:44 +01003179 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 get_random_bytes(&refresh, sizeof(refresh));
3181 rsp->state += refresh;
3182 rsp->count = FAULT_RANDOM_REFRESH;
3183 }
3184 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
3185 return swahw32(rsp->state);
3186}
3187
3188static char *
3189_drbd_fault_str(unsigned int type) {
3190 static char *_faults[] = {
3191 [DRBD_FAULT_MD_WR] = "Meta-data write",
3192 [DRBD_FAULT_MD_RD] = "Meta-data read",
3193 [DRBD_FAULT_RS_WR] = "Resync write",
3194 [DRBD_FAULT_RS_RD] = "Resync read",
3195 [DRBD_FAULT_DT_WR] = "Data write",
3196 [DRBD_FAULT_DT_RD] = "Data read",
3197 [DRBD_FAULT_DT_RA] = "Data read ahead",
3198 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02003199 [DRBD_FAULT_AL_EE] = "EE allocation",
3200 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201 };
3202
3203 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
3204}
3205
3206unsigned int
3207_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3208{
3209 static struct fault_random_state rrs = {0, 0};
3210
3211 unsigned int ret = (
3212 (fault_devs == 0 ||
3213 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
3214 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
3215
3216 if (ret) {
3217 fault_count++;
3218
Lars Ellenberg73835062010-05-27 11:51:56 +02003219 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220 dev_warn(DEV, "***Simulating %s failure\n",
3221 _drbd_fault_str(type));
3222 }
3223
3224 return ret;
3225}
3226#endif
3227
3228const char *drbd_buildtag(void)
3229{
3230 /* DRBD built from external sources has here a reference to the
3231 git hash of the source code. */
3232
3233 static char buildtag[38] = "\0uilt-in";
3234
3235 if (buildtag[0] == 0) {
3236#ifdef CONFIG_MODULES
3237 if (THIS_MODULE != NULL)
3238 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
3239 else
3240#endif
3241 buildtag[0] = 'b';
3242 }
3243
3244 return buildtag;
3245}
3246
3247module_init(drbd_init)
3248module_exit(drbd_cleanup)
3249
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250EXPORT_SYMBOL(drbd_conn_str);
3251EXPORT_SYMBOL(drbd_role_str);
3252EXPORT_SYMBOL(drbd_disk_str);
3253EXPORT_SYMBOL(drbd_set_st_err_str);