blob: c8d173c1139006a100d1e9723f812dcd49a32e14 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -070065
66static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
67static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
68
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
70#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
71
Lars Ellenberg45bb9122010-05-14 17:10:48 +020072/*
73 * some helper functions to deal with single linked page lists,
74 * page->private being our "next" pointer.
75 */
76
77/* If at least n pages are linked at head, get n pages off.
78 * Otherwise, don't modify head, and return NULL.
79 * Locking is the responsibility of the caller.
80 */
81static struct page *page_chain_del(struct page **head, int n)
82{
83 struct page *page;
84 struct page *tmp;
85
86 BUG_ON(!n);
87 BUG_ON(!head);
88
89 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020090
91 if (!page)
92 return NULL;
93
Lars Ellenberg45bb9122010-05-14 17:10:48 +020094 while (page) {
95 tmp = page_chain_next(page);
96 if (--n == 0)
97 break; /* found sufficient pages */
98 if (tmp == NULL)
99 /* insufficient pages, don't use any of them. */
100 return NULL;
101 page = tmp;
102 }
103
104 /* add end of list marker for the returned list */
105 set_page_private(page, 0);
106 /* actual return value, and adjustment of head */
107 page = *head;
108 *head = tmp;
109 return page;
110}
111
112/* may be used outside of locks to find the tail of a (usually short)
113 * "private" page chain, before adding it back to a global chain head
114 * with page_chain_add() under a spinlock. */
115static struct page *page_chain_tail(struct page *page, int *len)
116{
117 struct page *tmp;
118 int i = 1;
119 while ((tmp = page_chain_next(page)))
120 ++i, page = tmp;
121 if (len)
122 *len = i;
123 return page;
124}
125
126static int page_chain_free(struct page *page)
127{
128 struct page *tmp;
129 int i = 0;
130 page_chain_for_each_safe(page, tmp) {
131 put_page(page);
132 ++i;
133 }
134 return i;
135}
136
137static void page_chain_add(struct page **head,
138 struct page *chain_first, struct page *chain_last)
139{
140#if 1
141 struct page *tmp;
142 tmp = page_chain_tail(chain_first, NULL);
143 BUG_ON(tmp != chain_last);
144#endif
145
146 /* add chain to head */
147 set_page_private(chain_last, (unsigned long)*head);
148 *head = chain_first;
149}
150
151static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152{
153 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200154 struct page *tmp = NULL;
155 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
157 /* Yes, testing drbd_pp_vacant outside the lock is racy.
158 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200161 page = page_chain_del(&drbd_pp_pool, number);
162 if (page)
163 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (page)
166 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
170 * "criss-cross" setup, that might cause write-out on some other DRBD,
171 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172 for (i = 0; i < number; i++) {
173 tmp = alloc_page(GFP_TRY);
174 if (!tmp)
175 break;
176 set_page_private(tmp, (unsigned long)page);
177 page = tmp;
178 }
179
180 if (i == number)
181 return page;
182
183 /* Not enough pages immediately available this time.
184 * No need to jump around here, drbd_pp_alloc will retry this
185 * function "soon". */
186 if (page) {
187 tmp = page_chain_tail(page, NULL);
188 spin_lock(&drbd_pp_lock);
189 page_chain_add(&drbd_pp_pool, page, tmp);
190 drbd_pp_vacant += i;
191 spin_unlock(&drbd_pp_lock);
192 }
193 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700194}
195
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
197{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100198 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199 struct list_head *le, *tle;
200
201 /* The EEs are always appended to the end of the list. Since
202 they are sent in order over the wire, they have to finish
203 in order. As soon as we see the first not finished we can
204 stop to examine the list... */
205
206 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100207 peer_req = list_entry(le, struct drbd_peer_request, w.list);
208 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700209 break;
210 list_move(le, to_be_freed);
211 }
212}
213
214static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
215{
216 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100217 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218
Philipp Reisner87eeee42011-01-19 14:16:30 +0100219 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100221 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
224 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225}
226
227/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200228 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200230 * @number: number of pages requested
231 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * Tries to allocate number pages, first from our own page pool, then from
234 * the kernel, unless this allocation would exceed the max_buffers setting.
235 * Possibly retry until DRBD frees sufficient pages somewhere else.
236 *
237 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700240{
241 struct page *page = NULL;
242 DEFINE_WAIT(wait);
243
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200244 /* Yes, we may run up to @number over max_buffers. If we
245 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100246 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200247 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
251
252 drbd_kick_lo_and_reclaim_net(mdev);
253
Philipp Reisner89e58e72011-01-19 13:12:45 +0100254 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 if (page)
257 break;
258 }
259
260 if (!retry)
261 break;
262
263 if (signal_pending(current)) {
264 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
265 break;
266 }
267
268 schedule();
269 }
270 finish_wait(&drbd_pp_wait, &wait);
271
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200272 if (page)
273 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 return page;
275}
276
277/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100278 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200279 * Either links the page chain back to the global pool,
280 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200281static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700282{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200283 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200285
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100286 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200287 i = page_chain_free(page);
288 else {
289 struct page *tmp;
290 tmp = page_chain_tail(page, &i);
291 spin_lock(&drbd_pp_lock);
292 page_chain_add(&drbd_pp_pool, page, tmp);
293 drbd_pp_vacant += i;
294 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700295 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200296 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200297 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200298 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
299 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700300 wake_up(&drbd_pp_wait);
301}
302
303/*
304You need to hold the req_lock:
305 _drbd_wait_ee_list_empty()
306
307You must not have the req_lock:
308 drbd_free_ee()
309 drbd_alloc_ee()
310 drbd_init_ee()
311 drbd_release_ee()
312 drbd_ee_fix_bhs()
313 drbd_process_done_ee()
314 drbd_clear_done_ee()
315 drbd_wait_ee_list_empty()
316*/
317
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100318struct drbd_peer_request *
319drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
320 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100322 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200324 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100326 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 return NULL;
328
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100329 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
330 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331 if (!(gfp_mask & __GFP_NOWARN))
332 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
333 return NULL;
334 }
335
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200336 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
337 if (!page)
338 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100340 drbd_clear_interval(&peer_req->i);
341 peer_req->i.size = data_size;
342 peer_req->i.sector = sector;
343 peer_req->i.local = false;
344 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 peer_req->epoch = NULL;
347 peer_req->mdev = mdev;
348 peer_req->pages = page;
349 atomic_set(&peer_req->pending_bios, 0);
350 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100351 /*
352 * The block_id is opaque to the receiver. It is not endianness
353 * converted, and sent back to the sender unchanged.
354 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100355 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200359 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100360 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700361 return NULL;
362}
363
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100365 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700366{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100367 if (peer_req->flags & EE_HAS_DIGEST)
368 kfree(peer_req->digest);
369 drbd_pp_free(mdev, peer_req->pages, is_net);
370 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
371 D_ASSERT(drbd_interval_empty(&peer_req->i));
372 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373}
374
375int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
376{
377 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100378 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200380 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Philipp Reisner87eeee42011-01-19 14:16:30 +0100382 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100384 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100386 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
387 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388 count++;
389 }
390 return count;
391}
392
393
394/*
395 * This function is called from _asender only_
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100396 * but see also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397 * and receive_Barrier.
398 *
399 * Move entries from net_ee to done_ee, if ready.
400 * Grab done_ee, call all callbacks, free the entries.
401 * The callbacks typically send out ACKs.
402 */
403static int drbd_process_done_ee(struct drbd_conf *mdev)
404{
405 LIST_HEAD(work_list);
406 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100407 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
409
Philipp Reisner87eeee42011-01-19 14:16:30 +0100410 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411 reclaim_net_ee(mdev, &reclaimed);
412 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100413 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100415 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
416 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417
418 /* possible callbacks here:
419 * e_end_block, and e_end_resync_block, e_send_discard_ack.
420 * all ignore the last argument.
421 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100422 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok;
425 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426 }
427 wake_up(&mdev->ee_wait);
428
429 return ok;
430}
431
432void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
433{
434 DEFINE_WAIT(wait);
435
436 /* avoids spin_lock/unlock
437 * and calling prepare_to_wait in the fast path */
438 while (!list_empty(head)) {
439 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100440 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100441 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444 }
445}
446
447void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
448{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100449 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
454/* see also kernel_accept; which is only present since 2.6.18.
455 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100456static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700457{
458 struct sock *sk = sock->sk;
459 int err = 0;
460
461 *what = "listen";
462 err = sock->ops->listen(sock, 5);
463 if (err < 0)
464 goto out;
465
466 *what = "sock_create_lite";
467 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
468 newsock);
469 if (err < 0)
470 goto out;
471
472 *what = "accept";
473 err = sock->ops->accept(sock, *newsock, 0);
474 if (err < 0) {
475 sock_release(*newsock);
476 *newsock = NULL;
477 goto out;
478 }
479 (*newsock)->ops = sock->ops;
480
481out:
482 return err;
483}
484
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100485static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486{
487 mm_segment_t oldfs;
488 struct kvec iov = {
489 .iov_base = buf,
490 .iov_len = size,
491 };
492 struct msghdr msg = {
493 .msg_iovlen = 1,
494 .msg_iov = (struct iovec *)&iov,
495 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
496 };
497 int rv;
498
499 oldfs = get_fs();
500 set_fs(KERNEL_DS);
501 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
502 set_fs(oldfs);
503
504 return rv;
505}
506
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100507static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508{
509 mm_segment_t oldfs;
510 struct kvec iov = {
511 .iov_base = buf,
512 .iov_len = size,
513 };
514 struct msghdr msg = {
515 .msg_iovlen = 1,
516 .msg_iov = (struct iovec *)&iov,
517 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
518 };
519 int rv;
520
521 oldfs = get_fs();
522 set_fs(KERNEL_DS);
523
524 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100525 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700526 if (rv == size)
527 break;
528
529 /* Note:
530 * ECONNRESET other side closed the connection
531 * ERESTARTSYS (on sock) we got a signal
532 */
533
534 if (rv < 0) {
535 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100536 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100538 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700539 break;
540 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100541 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542 break;
543 } else {
544 /* signal came in, or peer/link went down,
545 * after we read a partial message
546 */
547 /* D_ASSERT(signal_pending(current)); */
548 break;
549 }
550 };
551
552 set_fs(oldfs);
553
554 if (rv != size)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100555 drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700556
557 return rv;
558}
559
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200560/* quoting tcp(7):
561 * On individual connections, the socket buffer size must be set prior to the
562 * listen(2) or connect(2) calls in order to have it take effect.
563 * This is our wrapper to do so.
564 */
565static void drbd_setbufsize(struct socket *sock, unsigned int snd,
566 unsigned int rcv)
567{
568 /* open coded SO_SNDBUF, SO_RCVBUF */
569 if (snd) {
570 sock->sk->sk_sndbuf = snd;
571 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
572 }
573 if (rcv) {
574 sock->sk->sk_rcvbuf = rcv;
575 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
576 }
577}
578
Philipp Reisnereac3e992011-02-07 14:05:07 +0100579static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580{
581 const char *what;
582 struct socket *sock;
583 struct sockaddr_in6 src_in6;
584 int err;
585 int disconnect_on_error = 1;
586
Philipp Reisnereac3e992011-02-07 14:05:07 +0100587 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588 return NULL;
589
590 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100591 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 SOCK_STREAM, IPPROTO_TCP, &sock);
593 if (err < 0) {
594 sock = NULL;
595 goto out;
596 }
597
598 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100599 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
600 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
601 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602
603 /* explicitly bind to the configured IP as source IP
604 * for the outgoing connections.
605 * This is needed for multihomed hosts and to be
606 * able to use lo: interfaces for drbd.
607 * Make sure to use 0 as port number, so linux selects
608 * a free one dynamically.
609 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100610 memcpy(&src_in6, tconn->net_conf->my_addr,
611 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
612 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700613 src_in6.sin6_port = 0;
614 else
615 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
616
617 what = "bind before connect";
618 err = sock->ops->bind(sock,
619 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100620 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
628 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100629 (struct sockaddr *)tconn->net_conf->peer_addr,
630 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631
632out:
633 if (err < 0) {
634 if (sock) {
635 sock_release(sock);
636 sock = NULL;
637 }
638 switch (-err) {
639 /* timeout, busy, signal pending */
640 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
641 case EINTR: case ERESTARTSYS:
642 /* peer not (yet) available, network problem */
643 case ECONNREFUSED: case ENETUNREACH:
644 case EHOSTDOWN: case EHOSTUNREACH:
645 disconnect_on_error = 0;
646 break;
647 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100648 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 }
650 if (disconnect_on_error)
Philipp Reisnereac3e992011-02-07 14:05:07 +0100651 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100653 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654 return sock;
655}
656
Philipp Reisner76536202011-02-07 14:09:54 +0100657static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658{
659 int timeo, err;
660 struct socket *s_estab = NULL, *s_listen;
661 const char *what;
662
Philipp Reisner76536202011-02-07 14:09:54 +0100663 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664 return NULL;
665
666 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100667 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 SOCK_STREAM, IPPROTO_TCP, &s_listen);
669 if (err) {
670 s_listen = NULL;
671 goto out;
672 }
673
Philipp Reisner76536202011-02-07 14:09:54 +0100674 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
676
677 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
678 s_listen->sk->sk_rcvtimeo = timeo;
679 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100680 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
681 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682
683 what = "bind before listen";
684 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100685 (struct sockaddr *) tconn->net_conf->my_addr,
686 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700687 if (err < 0)
688 goto out;
689
Philipp Reisner76536202011-02-07 14:09:54 +0100690 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
692out:
693 if (s_listen)
694 sock_release(s_listen);
695 if (err < 0) {
696 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100697 conn_err(tconn, "%s failed, err = %d\n", what, err);
698 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699 }
700 }
Philipp Reisner76536202011-02-07 14:09:54 +0100701 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702
703 return s_estab;
704}
705
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100706static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100708 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700709
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100710 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711}
712
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100713static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100715 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700716 int rr;
717
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100718 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100720 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 return be16_to_cpu(h->command);
722
723 return 0xffff;
724}
725
726/**
727 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 * @sock: pointer to the pointer to the socket.
729 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100730static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731{
732 int rr;
733 char tb[4];
734
735 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100736 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100738 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739
740 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100741 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 } else {
743 sock_release(*sock);
744 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100745 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746 }
747}
748
Philipp Reisner907599e2011-02-08 11:25:37 +0100749static int drbd_connected(int vnr, void *p, void *data)
750{
751 struct drbd_conf *mdev = (struct drbd_conf *)p;
752 int ok = 1;
753
754 atomic_set(&mdev->packet_seq, 0);
755 mdev->peer_seq = 0;
756
757 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
758 ok &= drbd_send_sizes(mdev, 0, 0);
759 ok &= drbd_send_uuids(mdev);
760 ok &= drbd_send_state(mdev);
761 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
762 clear_bit(RESIZE_PENDING, &mdev->flags);
763
764 return !ok;
765}
766
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767/*
768 * return values:
769 * 1 yes, we have a valid connection
770 * 0 oops, did not work out, please try again
771 * -1 peer talks different language,
772 * no point in trying again, please go standalone.
773 * -2 We do not have a network config...
774 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100775static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776{
777 struct socket *s, *sock, *msock;
778 int try, h, ok;
779
Philipp Reisner907599e2011-02-08 11:25:37 +0100780 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781 return -2;
782
Philipp Reisner907599e2011-02-08 11:25:37 +0100783 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
784 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100785 /* agreed_pro_version must be smaller than 100 so we send the old
786 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787
788 sock = NULL;
789 msock = NULL;
790
791 do {
792 for (try = 0;;) {
793 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100794 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795 if (s || ++try >= 3)
796 break;
797 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100798 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 }
800
801 if (s) {
802 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100803 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 sock = s;
805 s = NULL;
806 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 msock = s;
809 s = NULL;
810 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 goto out_release_sockets;
813 }
814 }
815
816 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100817 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100818 ok = drbd_socket_okay(&sock);
819 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820 if (ok)
821 break;
822 }
823
824retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100825 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100827 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100828 drbd_socket_okay(&sock);
829 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 switch (try) {
831 case P_HAND_SHAKE_S:
832 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100833 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 sock_release(sock);
835 }
836 sock = s;
837 break;
838 case P_HAND_SHAKE_M:
839 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100840 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700841 sock_release(msock);
842 }
843 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 break;
846 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100847 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848 sock_release(s);
849 if (random32() & 1)
850 goto retry;
851 }
852 }
853
Philipp Reisner907599e2011-02-08 11:25:37 +0100854 if (tconn->volume0->state.conn <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 goto out_release_sockets;
856 if (signal_pending(current)) {
857 flush_signals(current);
858 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100859 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 goto out_release_sockets;
861 }
862
863 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100864 ok = drbd_socket_okay(&sock);
865 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 if (ok)
867 break;
868 }
869 } while (1);
870
871 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
872 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
873
874 sock->sk->sk_allocation = GFP_NOIO;
875 msock->sk->sk_allocation = GFP_NOIO;
876
877 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
878 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
879
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100881 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
883 * first set it to the P_HAND_SHAKE timeout,
884 * which we set to 4x the configured ping_timeout. */
885 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100886 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887
Philipp Reisner907599e2011-02-08 11:25:37 +0100888 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
889 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890
891 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300892 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 drbd_tcp_nodelay(sock);
894 drbd_tcp_nodelay(msock);
895
Philipp Reisner907599e2011-02-08 11:25:37 +0100896 tconn->data.socket = sock;
897 tconn->meta.socket = msock;
898 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 if (h <= 0)
902 return h;
903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100906 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100907 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100910 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100911 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100912 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 }
914 }
915
Philipp Reisner907599e2011-02-08 11:25:37 +0100916 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 return 0;
918
Philipp Reisner907599e2011-02-08 11:25:37 +0100919 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
921
Philipp Reisner907599e2011-02-08 11:25:37 +0100922 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923
Philipp Reisner907599e2011-02-08 11:25:37 +0100924 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200925 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926
Philipp Reisner907599e2011-02-08 11:25:37 +0100927 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928
929out_release_sockets:
930 if (sock)
931 sock_release(sock);
932 if (msock)
933 sock_release(msock);
934 return -1;
935}
936
Philipp Reisnerce243852011-02-07 17:27:47 +0100937static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100939 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100940 pi->cmd = be16_to_cpu(h->h80.command);
941 pi->size = be16_to_cpu(h->h80.length);
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100942 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100943 pi->cmd = be16_to_cpu(h->h95.command);
944 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
945 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200946 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100947 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200948 be32_to_cpu(h->h80.magic),
949 be16_to_cpu(h->h80.command),
950 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100951 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100953 return true;
954}
955
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100956static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100957{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100958 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100959 int r;
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962 if (unlikely(r != sizeof(*h))) {
963 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100964 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100965 return false;
966 }
967
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100968 r = decode_header(tconn, h, pi);
969 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970
Philipp Reisner257d0af2011-01-26 12:15:29 +0100971 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972}
973
Philipp Reisner2451fc32010-08-24 13:43:11 +0200974static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975{
976 int rv;
977
978 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400979 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200980 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 if (rv) {
982 dev_err(DEV, "local disk flush failed with status %d\n", rv);
983 /* would rather check on EOPNOTSUPP, but that is not reliable.
984 * don't try again for ANY return value != 0
985 * if (rv == -EOPNOTSUPP) */
986 drbd_bump_write_ordering(mdev, WO_drain_io);
987 }
988 put_ldev(mdev);
989 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990}
991
992/**
993 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
994 * @mdev: DRBD device.
995 * @epoch: Epoch object.
996 * @ev: Epoch event.
997 */
998static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
999 struct drbd_epoch *epoch,
1000 enum epoch_event ev)
1001{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001002 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004 enum finish_epoch rv = FE_STILL_LIVE;
1005
1006 spin_lock(&mdev->epoch_lock);
1007 do {
1008 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
1010 epoch_size = atomic_read(&epoch->epoch_size);
1011
1012 switch (ev & ~EV_CLEANUP) {
1013 case EV_PUT:
1014 atomic_dec(&epoch->active);
1015 break;
1016 case EV_GOT_BARRIER_NR:
1017 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 break;
1019 case EV_BECAME_LAST:
1020 /* nothing to do*/
1021 break;
1022 }
1023
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 if (epoch_size != 0 &&
1025 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001026 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027 if (!(ev & EV_CLEANUP)) {
1028 spin_unlock(&mdev->epoch_lock);
1029 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1030 spin_lock(&mdev->epoch_lock);
1031 }
1032 dec_unacked(mdev);
1033
1034 if (mdev->current_epoch != epoch) {
1035 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1036 list_del(&epoch->list);
1037 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1038 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 kfree(epoch);
1040
1041 if (rv == FE_STILL_LIVE)
1042 rv = FE_DESTROYED;
1043 } else {
1044 epoch->flags = 0;
1045 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001046 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001047 if (rv == FE_STILL_LIVE)
1048 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001049 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 }
1051 }
1052
1053 if (!next_epoch)
1054 break;
1055
1056 epoch = next_epoch;
1057 } while (1);
1058
1059 spin_unlock(&mdev->epoch_lock);
1060
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 return rv;
1062}
1063
1064/**
1065 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1066 * @mdev: DRBD device.
1067 * @wo: Write ordering method to try.
1068 */
1069void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1070{
1071 enum write_ordering_e pwo;
1072 static char *write_ordering_str[] = {
1073 [WO_none] = "none",
1074 [WO_drain_io] = "drain",
1075 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076 };
1077
1078 pwo = mdev->write_ordering;
1079 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1081 wo = WO_drain_io;
1082 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1083 wo = WO_none;
1084 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001085 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1087}
1088
1089/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001090 * drbd_submit_ee()
1091 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001092 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001093 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001094 *
1095 * May spread the pages to multiple bios,
1096 * depending on bio_add_page restrictions.
1097 *
1098 * Returns 0 if all bios have been submitted,
1099 * -ENOMEM if we could not allocate enough bios,
1100 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1101 * single page to an empty bio (which should never happen and likely indicates
1102 * that the lower level IO stack is in some way broken). This has been observed
1103 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001104 */
1105/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001106int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001107 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001108{
1109 struct bio *bios = NULL;
1110 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001111 struct page *page = peer_req->pages;
1112 sector_t sector = peer_req->i.sector;
1113 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114 unsigned n_bios = 0;
1115 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001116 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001117
1118 /* In most cases, we will only need one bio. But in case the lower
1119 * level restrictions happen to be different at this offset on this
1120 * side than those of the sending peer, we may need to submit the
1121 * request in more than one bio. */
1122next_bio:
1123 bio = bio_alloc(GFP_NOIO, nr_pages);
1124 if (!bio) {
1125 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1126 goto fail;
1127 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001128 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001129 bio->bi_sector = sector;
1130 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001131 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001132 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001133 bio->bi_end_io = drbd_endio_sec;
1134
1135 bio->bi_next = bios;
1136 bios = bio;
1137 ++n_bios;
1138
1139 page_chain_for_each(page) {
1140 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1141 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001142 /* A single page must always be possible!
1143 * But in case it fails anyways,
1144 * we deal with it, and complain (below). */
1145 if (bio->bi_vcnt == 0) {
1146 dev_err(DEV,
1147 "bio_add_page failed for len=%u, "
1148 "bi_vcnt=0 (bi_sector=%llu)\n",
1149 len, (unsigned long long)bio->bi_sector);
1150 err = -ENOSPC;
1151 goto fail;
1152 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001153 goto next_bio;
1154 }
1155 ds -= len;
1156 sector += len >> 9;
1157 --nr_pages;
1158 }
1159 D_ASSERT(page == NULL);
1160 D_ASSERT(ds == 0);
1161
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001162 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001163 do {
1164 bio = bios;
1165 bios = bios->bi_next;
1166 bio->bi_next = NULL;
1167
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001168 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170 return 0;
1171
1172fail:
1173 while (bios) {
1174 bio = bios;
1175 bios = bios->bi_next;
1176 bio_put(bio);
1177 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001178 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001179}
1180
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001181static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001182 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001183{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001184 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001185
1186 drbd_remove_interval(&mdev->write_requests, i);
1187 drbd_clear_interval(i);
1188
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001189 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001190 if (i->waiting)
1191 wake_up(&mdev->misc_wait);
1192}
1193
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001194static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1195 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001197 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001198 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 struct drbd_epoch *epoch;
1200
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 inc_unacked(mdev);
1202
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203 mdev->current_epoch->barrier_nr = p->barrier;
1204 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1205
1206 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1207 * the activity log, which means it would not be resynced in case the
1208 * R_PRIMARY crashes now.
1209 * Therefore we must send the barrier_ack after the barrier request was
1210 * completed. */
1211 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 case WO_none:
1213 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001214 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001215
1216 /* receiver context, in the writeout path of the other node.
1217 * avoid potential distributed deadlock */
1218 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1219 if (epoch)
1220 break;
1221 else
1222 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1223 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224
1225 case WO_bdev_flush:
1226 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001228 drbd_flush(mdev);
1229
1230 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1231 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1232 if (epoch)
1233 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 }
1235
Philipp Reisner2451fc32010-08-24 13:43:11 +02001236 epoch = mdev->current_epoch;
1237 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1238
1239 D_ASSERT(atomic_read(&epoch->active) == 0);
1240 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001242 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001243 default:
1244 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001245 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 }
1247
1248 epoch->flags = 0;
1249 atomic_set(&epoch->epoch_size, 0);
1250 atomic_set(&epoch->active, 0);
1251
1252 spin_lock(&mdev->epoch_lock);
1253 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1254 list_add(&epoch->list, &mdev->current_epoch->list);
1255 mdev->current_epoch = epoch;
1256 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 } else {
1258 /* The current_epoch got recycled while we allocated this one... */
1259 kfree(epoch);
1260 }
1261 spin_unlock(&mdev->epoch_lock);
1262
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001263 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264}
1265
1266/* used from receive_RSDataReply (recv_resync_read)
1267 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001268static struct drbd_peer_request *
1269read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1270 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271{
Lars Ellenberg66660322010-04-06 12:15:04 +02001272 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001273 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001275 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001276 void *dig_in = mdev->tconn->int_dig_in;
1277 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001278 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279
Philipp Reisnera0638452011-01-19 14:31:32 +01001280 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1281 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282
1283 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001284 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001286 if (!signal_pending(current))
1287 dev_warn(DEV,
1288 "short read receiving data digest: read %d expected %d\n",
1289 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 return NULL;
1291 }
1292 }
1293
1294 data_size -= dgs;
1295
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001296 if (!expect(data_size != 0))
1297 return NULL;
1298 if (!expect(IS_ALIGNED(data_size, 512)))
1299 return NULL;
1300 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1301 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302
Lars Ellenberg66660322010-04-06 12:15:04 +02001303 /* even though we trust out peer,
1304 * we sometimes have to double check. */
1305 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001306 dev_err(DEV, "request from peer beyond end of local disk: "
1307 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001308 (unsigned long long)capacity,
1309 (unsigned long long)sector, data_size);
1310 return NULL;
1311 }
1312
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1314 * "criss-cross" setup, that might cause write-out on some other DRBD,
1315 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001316 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1317 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001322 page_chain_for_each(page) {
1323 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001324 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001325 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001326 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001327 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1328 data[0] = data[0] ^ (unsigned long)-1;
1329 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001331 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001332 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001333 if (!signal_pending(current))
1334 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1335 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 return NULL;
1337 }
1338 ds -= rr;
1339 }
1340
1341 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001343 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001344 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1345 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001347 dgs, dig_in, dig_vv, peer_req);
1348 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 return NULL;
1350 }
1351 }
1352 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354}
1355
1356/* drbd_drain_block() just takes a data block
1357 * out of the socket input buffer, and discards it.
1358 */
1359static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1360{
1361 struct page *page;
1362 int rr, rv = 1;
1363 void *data;
1364
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001365 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001366 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001367
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001368 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369
1370 data = kmap(page);
1371 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001372 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1374 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001375 if (!signal_pending(current))
1376 dev_warn(DEV,
1377 "short read receiving data: read %d expected %d\n",
1378 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001379 break;
1380 }
1381 data_size -= rr;
1382 }
1383 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001384 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 return rv;
1386}
1387
1388static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1389 sector_t sector, int data_size)
1390{
1391 struct bio_vec *bvec;
1392 struct bio *bio;
1393 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001394 void *dig_in = mdev->tconn->int_dig_in;
1395 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396
Philipp Reisnera0638452011-01-19 14:31:32 +01001397 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1398 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001399
1400 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001401 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001403 if (!signal_pending(current))
1404 dev_warn(DEV,
1405 "short read receiving data reply digest: read %d expected %d\n",
1406 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407 return 0;
1408 }
1409 }
1410
1411 data_size -= dgs;
1412
1413 /* optimistically update recv_cnt. if receiving fails below,
1414 * we disconnect anyways, and counters will be reset. */
1415 mdev->recv_cnt += data_size>>9;
1416
1417 bio = req->master_bio;
1418 D_ASSERT(sector == bio->bi_sector);
1419
1420 bio_for_each_segment(bvec, bio, i) {
1421 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001422 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423 kmap(bvec->bv_page)+bvec->bv_offset,
1424 expect);
1425 kunmap(bvec->bv_page);
1426 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001427 if (!signal_pending(current))
1428 dev_warn(DEV, "short read receiving data reply: "
1429 "read %d expected %d\n",
1430 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431 return 0;
1432 }
1433 data_size -= rr;
1434 }
1435
1436 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001437 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438 if (memcmp(dig_in, dig_vv, dgs)) {
1439 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1440 return 0;
1441 }
1442 }
1443
1444 D_ASSERT(data_size == 0);
1445 return 1;
1446}
1447
1448/* e_end_resync_block() is called via
1449 * drbd_process_done_ee() by asender only */
1450static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1451{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001452 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1453 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454 int ok;
1455
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001456 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001458 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1459 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1460 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461 } else {
1462 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001463 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001465 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
1467 dec_unacked(mdev);
1468
1469 return ok;
1470}
1471
1472static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1473{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001474 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001476 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1477 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001478 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479
1480 dec_rs_pending(mdev);
1481
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482 inc_unacked(mdev);
1483 /* corresponding dec_unacked() in e_end_resync_block()
1484 * respective _drbd_clear_done_ee */
1485
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001486 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001487
Philipp Reisner87eeee42011-01-19 14:16:30 +01001488 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001489 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001490 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001492 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001494 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001496 /* don't care for the reason here */
1497 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001498 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001499 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001500 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001501
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001502 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001503fail:
1504 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001505 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506}
1507
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001508static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001509find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1510 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001511{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001512 struct drbd_request *req;
1513
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001514 /* Request object according to our peer */
1515 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001516 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001517 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001518 if (!missing_ok) {
1519 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1520 (unsigned long)id, (unsigned long long)sector);
1521 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001522 return NULL;
1523}
1524
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001525static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1526 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527{
1528 struct drbd_request *req;
1529 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001531 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532
1533 sector = be64_to_cpu(p->sector);
1534
Philipp Reisner87eeee42011-01-19 14:16:30 +01001535 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001536 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001537 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001538 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001539 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
Bart Van Assche24c48302011-05-21 18:32:29 +02001541 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 * special casing it there for the various failure cases.
1543 * still no race with drbd_fail_pending_reads */
1544 ok = recv_dless_read(mdev, req, sector, data_size);
1545
1546 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001547 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 /* else: nothing. handled from drbd_disconnect...
1549 * I don't think we may complete this just yet
1550 * in case we are "on-disconnect: freeze" */
1551
1552 return ok;
1553}
1554
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001555static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1556 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557{
1558 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001560 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001561
1562 sector = be64_to_cpu(p->sector);
1563 D_ASSERT(p->block_id == ID_SYNCER);
1564
1565 if (get_ldev(mdev)) {
1566 /* data is submitted to disk within recv_resync_read.
1567 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001568 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569 ok = recv_resync_read(mdev, sector, data_size);
1570 } else {
1571 if (__ratelimit(&drbd_ratelimit_state))
1572 dev_err(DEV, "Can not write resync data to local disk.\n");
1573
1574 ok = drbd_drain_block(mdev, data_size);
1575
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001576 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 }
1578
Philipp Reisner778f2712010-07-06 11:14:00 +02001579 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1580
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581 return ok;
1582}
1583
1584/* e_end_block() is called via drbd_process_done_ee().
1585 * this means this function only runs in the asender thread
1586 */
1587static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1588{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001589 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1590 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 int ok = 1, pcmd;
1592
Philipp Reisner89e58e72011-01-19 13:12:45 +01001593 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001594 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1596 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001597 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001599 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001601 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001603 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 /* we expect it to be marked out of sync anyways...
1605 * maybe assert this? */
1606 }
1607 dec_unacked(mdev);
1608 }
1609 /* we delete from the conflict detection hash _after_ we sent out the
1610 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001611 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001612 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001613 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1614 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001615 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001616 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001617 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001619 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620
1621 return ok;
1622}
1623
1624static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1625{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001626 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627 int ok = 1;
1628
Philipp Reisner89e58e72011-01-19 13:12:45 +01001629 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001630 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001631
Philipp Reisner87eeee42011-01-19 14:16:30 +01001632 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001633 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1634 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001635 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636
1637 dec_unacked(mdev);
1638
1639 return ok;
1640}
1641
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001642static bool seq_greater(u32 a, u32 b)
1643{
1644 /*
1645 * We assume 32-bit wrap-around here.
1646 * For 24-bit wrap-around, we would have to shift:
1647 * a <<= 8; b <<= 8;
1648 */
1649 return (s32)a - (s32)b > 0;
1650}
1651
1652static u32 seq_max(u32 a, u32 b)
1653{
1654 return seq_greater(a, b) ? a : b;
1655}
1656
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001657static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001658{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001659 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001660
1661 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001662 old_peer_seq = mdev->peer_seq;
1663 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001664 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001665 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001666 wake_up(&mdev->seq_wait);
1667}
1668
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669/* Called from receive_Data.
1670 * Synchronize packets on sock with packets on msock.
1671 *
1672 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1673 * packet traveling on msock, they are still processed in the order they have
1674 * been sent.
1675 *
1676 * Note: we don't care for Ack packets overtaking P_DATA packets.
1677 *
1678 * In case packet_seq is larger than mdev->peer_seq number, there are
1679 * outstanding packets on the msock. We wait for them to arrive.
1680 * In case we are the logically next packet, we update mdev->peer_seq
1681 * ourselves. Correctly handles 32bit wrap around.
1682 *
1683 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1684 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1685 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1686 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1687 *
1688 * returns 0 if we may process the packet,
1689 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1690static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1691{
1692 DEFINE_WAIT(wait);
1693 unsigned int p_seq;
1694 long timeout;
1695 int ret = 0;
1696 spin_lock(&mdev->peer_seq_lock);
1697 for (;;) {
1698 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001699 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700 break;
1701 if (signal_pending(current)) {
1702 ret = -ERESTARTSYS;
1703 break;
1704 }
1705 p_seq = mdev->peer_seq;
1706 spin_unlock(&mdev->peer_seq_lock);
1707 timeout = schedule_timeout(30*HZ);
1708 spin_lock(&mdev->peer_seq_lock);
1709 if (timeout == 0 && p_seq == mdev->peer_seq) {
1710 ret = -ETIMEDOUT;
1711 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1712 break;
1713 }
1714 }
1715 finish_wait(&mdev->seq_wait, &wait);
1716 if (mdev->peer_seq+1 == packet_seq)
1717 mdev->peer_seq++;
1718 spin_unlock(&mdev->peer_seq_lock);
1719 return ret;
1720}
1721
Lars Ellenberg688593c2010-11-17 22:25:03 +01001722/* see also bio_flags_to_wire()
1723 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1724 * flags and back. We may replicate to other kernel versions. */
1725static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001726{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001727 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1728 (dpf & DP_FUA ? REQ_FUA : 0) |
1729 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1730 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001731}
1732
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001734static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1735 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736{
1737 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001738 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001739 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 int rw = WRITE;
1741 u32 dp_flags;
1742
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744 spin_lock(&mdev->peer_seq_lock);
1745 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1746 mdev->peer_seq++;
1747 spin_unlock(&mdev->peer_seq_lock);
1748
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001749 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001750 atomic_inc(&mdev->current_epoch->epoch_size);
1751 return drbd_drain_block(mdev, data_size);
1752 }
1753
1754 /* get_ldev(mdev) successful.
1755 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001756 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757 * the end of this function. */
1758
1759 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001760 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1761 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001762 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001763 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001764 }
1765
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001766 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767
Lars Ellenberg688593c2010-11-17 22:25:03 +01001768 dp_flags = be32_to_cpu(p->dp_flags);
1769 rw |= wire_flags_to_bio(mdev, dp_flags);
1770
1771 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001772 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001773
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001775 peer_req->epoch = mdev->current_epoch;
1776 atomic_inc(&peer_req->epoch->epoch_size);
1777 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778 spin_unlock(&mdev->epoch_lock);
1779
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001781 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001782 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 } else {
1784 /* don't get the req_lock yet,
1785 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001786 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001787 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 int first;
1790
Philipp Reisner89e58e72011-01-19 13:12:45 +01001791 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792
1793 /* conflict detection and handling:
1794 * 1. wait on the sequence number,
1795 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001796 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 *
1798 * Note: for two_primaries, we are protocol C,
1799 * so there cannot be any request that is DONE
1800 * but still on the transfer log.
1801 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802 * if no conflicting request is found:
1803 * submit.
1804 *
1805 * if any conflicting request is found
1806 * that has not yet been acked,
1807 * AND I have the "discard concurrent writes" flag:
1808 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1809 *
1810 * if any conflicting request is found:
1811 * block the receiver, waiting on misc_wait
1812 * until no more conflicting requests are there,
1813 * or we get interrupted (disconnect).
1814 *
1815 * we do not just write after local io completion of those
1816 * requests, but only after req is done completely, i.e.
1817 * we wait for the P_DISCARD_ACK to arrive!
1818 *
1819 * then proceed normally, i.e. submit.
1820 */
1821 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1822 goto out_interrupted;
1823
Philipp Reisner87eeee42011-01-19 14:16:30 +01001824 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826 first = 1;
1827 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001828 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001829 int have_unacked = 0;
1830 int have_conflict = 0;
1831 prepare_to_wait(&mdev->misc_wait, &wait,
1832 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001833
1834 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1835 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001836 /* only ALERT on first iteration,
1837 * we may be woken up early... */
1838 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001839 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001840 " new: %llus +%u; pending: %llus +%u\n",
1841 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001842 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001843 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001844 (unsigned long long)i->sector, i->size);
1845
1846 if (i->local) {
1847 struct drbd_request *req2;
1848
1849 req2 = container_of(i, struct drbd_request, i);
1850 if (req2->rq_state & RQ_NET_PENDING)
1851 ++have_unacked;
1852 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001853 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 if (!have_conflict)
1856 break;
1857
1858 /* Discard Ack only for the _first_ iteration */
1859 if (first && discard && have_unacked) {
1860 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1861 (unsigned long long)sector);
1862 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001863 peer_req->w.cb = e_send_discard_ack;
1864 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865
Philipp Reisner87eeee42011-01-19 14:16:30 +01001866 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001867
1868 /* we could probably send that P_DISCARD_ACK ourselves,
1869 * but I don't like the receiver using the msock */
1870
1871 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001872 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001874 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875 }
1876
1877 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001878 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879 finish_wait(&mdev->misc_wait, &wait);
1880 goto out_interrupted;
1881 }
1882
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001883 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001884 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001885
Philipp Reisner87eeee42011-01-19 14:16:30 +01001886 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001887 if (first) {
1888 first = 0;
1889 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1890 "sec=%llus\n", (unsigned long long)sector);
1891 } else if (discard) {
1892 /* we had none on the first iteration.
1893 * there must be none now. */
1894 D_ASSERT(have_unacked == 0);
1895 }
1896 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001897 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001898 }
1899 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001900
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001901 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902 }
1903
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001904 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001905 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906
Philipp Reisner89e58e72011-01-19 13:12:45 +01001907 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908 case DRBD_PROT_C:
1909 inc_unacked(mdev);
1910 /* corresponding dec_unacked() in e_end_block()
1911 * respective _drbd_clear_done_ee */
1912 break;
1913 case DRBD_PROT_B:
1914 /* I really don't like it that the receiver thread
1915 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001916 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917 break;
1918 case DRBD_PROT_A:
1919 /* nothing to do */
1920 break;
1921 }
1922
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001923 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001925 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1926 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1927 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1928 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929 }
1930
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001931 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001932 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001934 /* don't care for the reason here */
1935 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001936 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001937 list_del(&peer_req->w.list);
1938 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001939 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001940 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1941 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001942
Philipp Reisnerb411b362009-09-25 16:07:19 -07001943out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001944 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001946 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001947 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001948}
1949
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001950/* We may throttle resync, if the lower device seems to be busy,
1951 * and current sync rate is above c_min_rate.
1952 *
1953 * To decide whether or not the lower device is busy, we use a scheme similar
1954 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1955 * (more than 64 sectors) of activity we cannot account for with our own resync
1956 * activity, it obviously is "busy".
1957 *
1958 * The current sync rate used here uses only the most recent two step marks,
1959 * to have a short time average so we can react faster.
1960 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001961int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001962{
1963 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1964 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001965 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001966 int curr_events;
1967 int throttle = 0;
1968
1969 /* feature disabled? */
1970 if (mdev->sync_conf.c_min_rate == 0)
1971 return 0;
1972
Philipp Reisnere3555d82010-11-07 15:56:29 +01001973 spin_lock_irq(&mdev->al_lock);
1974 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1975 if (tmp) {
1976 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1977 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1978 spin_unlock_irq(&mdev->al_lock);
1979 return 0;
1980 }
1981 /* Do not slow down if app IO is already waiting for this extent */
1982 }
1983 spin_unlock_irq(&mdev->al_lock);
1984
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001985 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1986 (int)part_stat_read(&disk->part0, sectors[1]) -
1987 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001988
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001989 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1990 unsigned long rs_left;
1991 int i;
1992
1993 mdev->rs_last_events = curr_events;
1994
1995 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1996 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01001997 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1998
1999 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2000 rs_left = mdev->ov_left;
2001 else
2002 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002003
2004 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2005 if (!dt)
2006 dt++;
2007 db = mdev->rs_mark_left[i] - rs_left;
2008 dbdt = Bit2KB(db/dt);
2009
2010 if (dbdt > mdev->sync_conf.c_min_rate)
2011 throttle = 1;
2012 }
2013 return throttle;
2014}
2015
2016
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002017static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2018 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019{
2020 sector_t sector;
2021 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002022 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002024 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002025 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002026 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027
2028 sector = be64_to_cpu(p->sector);
2029 size = be32_to_cpu(p->blksize);
2030
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002031 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002032 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2033 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002034 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035 }
2036 if (sector + (size>>9) > capacity) {
2037 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2038 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002039 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 }
2041
2042 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002043 verb = 1;
2044 switch (cmd) {
2045 case P_DATA_REQUEST:
2046 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2047 break;
2048 case P_RS_DATA_REQUEST:
2049 case P_CSUM_RS_REQUEST:
2050 case P_OV_REQUEST:
2051 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2052 break;
2053 case P_OV_REPLY:
2054 verb = 0;
2055 dec_rs_pending(mdev);
2056 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2057 break;
2058 default:
2059 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2060 cmdname(cmd));
2061 }
2062 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002063 dev_err(DEV, "Can not satisfy peer's read request, "
2064 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002065
Lars Ellenberga821cc42010-09-06 12:31:37 +02002066 /* drain possibly payload */
2067 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002068 }
2069
2070 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2071 * "criss-cross" setup, that might cause write-out on some other DRBD,
2072 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002073 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2074 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002076 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002077 }
2078
Philipp Reisner02918be2010-08-20 14:35:10 +02002079 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002080 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002081 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002083 /* application IO, don't drbd_rs_begin_io */
2084 goto submit;
2085
Philipp Reisnerb411b362009-09-25 16:07:19 -07002086 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002087 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002089 /* used in the sector offset progress display */
2090 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 break;
2092
2093 case P_OV_REPLY:
2094 case P_CSUM_RS_REQUEST:
2095 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002096 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2097 if (!di)
2098 goto out_free_e;
2099
2100 di->digest_size = digest_size;
2101 di->digest = (((char *)di)+sizeof(struct digest_info));
2102
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002103 peer_req->digest = di;
2104 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002105
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002106 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002107 goto out_free_e;
2108
Philipp Reisner02918be2010-08-20 14:35:10 +02002109 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002110 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002111 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002112 /* used in the sector offset progress display */
2113 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002114 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002115 /* track progress, we may need to throttle */
2116 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002117 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002118 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002119 /* drbd_rs_begin_io done when we sent this request,
2120 * but accounting still needs to be done. */
2121 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 }
2123 break;
2124
2125 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002126 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002127 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002128 unsigned long now = jiffies;
2129 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002130 mdev->ov_start_sector = sector;
2131 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002132 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2133 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002134 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2135 mdev->rs_mark_left[i] = mdev->ov_left;
2136 mdev->rs_mark_time[i] = now;
2137 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138 dev_info(DEV, "Online Verify start sector: %llu\n",
2139 (unsigned long long)sector);
2140 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002141 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002142 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002143 break;
2144
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 default:
2146 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002147 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002148 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002149 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150 }
2151
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002152 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2153 * wrt the receiver, but it is not as straightforward as it may seem.
2154 * Various places in the resync start and stop logic assume resync
2155 * requests are processed in order, requeuing this on the worker thread
2156 * introduces a bunch of new code for synchronization between threads.
2157 *
2158 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2159 * "forever", throttling after drbd_rs_begin_io will lock that extent
2160 * for application writes for the same time. For now, just throttle
2161 * here, where the rest of the code expects the receiver to sleep for
2162 * a while, anyways.
2163 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002164
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002165 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2166 * this defers syncer requests for some time, before letting at least
2167 * on request through. The resync controller on the receiving side
2168 * will adapt to the incoming rate accordingly.
2169 *
2170 * We cannot throttle here if remote is Primary/SyncTarget:
2171 * we would also throttle its application reads.
2172 * In that case, throttling is done on the SyncTarget only.
2173 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002174 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2175 schedule_timeout_uninterruptible(HZ/10);
2176 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002177 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002179submit_for_resync:
2180 atomic_add(size >> 9, &mdev->rs_sect_ev);
2181
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002182submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002183 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002184 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002185 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002186 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002188 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002189 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002191 /* don't care for the reason here */
2192 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002193 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002194 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002195 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002196 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2197
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002200 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002201 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202}
2203
2204static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2205{
2206 int self, peer, rv = -100;
2207 unsigned long ch_self, ch_peer;
2208
2209 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2210 peer = mdev->p_uuid[UI_BITMAP] & 1;
2211
2212 ch_peer = mdev->p_uuid[UI_SIZE];
2213 ch_self = mdev->comm_bm_set;
2214
Philipp Reisner89e58e72011-01-19 13:12:45 +01002215 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216 case ASB_CONSENSUS:
2217 case ASB_DISCARD_SECONDARY:
2218 case ASB_CALL_HELPER:
2219 dev_err(DEV, "Configuration error.\n");
2220 break;
2221 case ASB_DISCONNECT:
2222 break;
2223 case ASB_DISCARD_YOUNGER_PRI:
2224 if (self == 0 && peer == 1) {
2225 rv = -1;
2226 break;
2227 }
2228 if (self == 1 && peer == 0) {
2229 rv = 1;
2230 break;
2231 }
2232 /* Else fall through to one of the other strategies... */
2233 case ASB_DISCARD_OLDER_PRI:
2234 if (self == 0 && peer == 1) {
2235 rv = 1;
2236 break;
2237 }
2238 if (self == 1 && peer == 0) {
2239 rv = -1;
2240 break;
2241 }
2242 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002243 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 "Using discard-least-changes instead\n");
2245 case ASB_DISCARD_ZERO_CHG:
2246 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002247 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 ? -1 : 1;
2249 break;
2250 } else {
2251 if (ch_peer == 0) { rv = 1; break; }
2252 if (ch_self == 0) { rv = -1; break; }
2253 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002254 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 break;
2256 case ASB_DISCARD_LEAST_CHG:
2257 if (ch_self < ch_peer)
2258 rv = -1;
2259 else if (ch_self > ch_peer)
2260 rv = 1;
2261 else /* ( ch_self == ch_peer ) */
2262 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002263 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264 ? -1 : 1;
2265 break;
2266 case ASB_DISCARD_LOCAL:
2267 rv = -1;
2268 break;
2269 case ASB_DISCARD_REMOTE:
2270 rv = 1;
2271 }
2272
2273 return rv;
2274}
2275
2276static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2277{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002278 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279
Philipp Reisner89e58e72011-01-19 13:12:45 +01002280 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002281 case ASB_DISCARD_YOUNGER_PRI:
2282 case ASB_DISCARD_OLDER_PRI:
2283 case ASB_DISCARD_LEAST_CHG:
2284 case ASB_DISCARD_LOCAL:
2285 case ASB_DISCARD_REMOTE:
2286 dev_err(DEV, "Configuration error.\n");
2287 break;
2288 case ASB_DISCONNECT:
2289 break;
2290 case ASB_CONSENSUS:
2291 hg = drbd_asb_recover_0p(mdev);
2292 if (hg == -1 && mdev->state.role == R_SECONDARY)
2293 rv = hg;
2294 if (hg == 1 && mdev->state.role == R_PRIMARY)
2295 rv = hg;
2296 break;
2297 case ASB_VIOLENTLY:
2298 rv = drbd_asb_recover_0p(mdev);
2299 break;
2300 case ASB_DISCARD_SECONDARY:
2301 return mdev->state.role == R_PRIMARY ? 1 : -1;
2302 case ASB_CALL_HELPER:
2303 hg = drbd_asb_recover_0p(mdev);
2304 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002305 enum drbd_state_rv rv2;
2306
2307 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002308 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2309 * we might be here in C_WF_REPORT_PARAMS which is transient.
2310 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002311 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2312 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002313 drbd_khelper(mdev, "pri-lost-after-sb");
2314 } else {
2315 dev_warn(DEV, "Successfully gave up primary role.\n");
2316 rv = hg;
2317 }
2318 } else
2319 rv = hg;
2320 }
2321
2322 return rv;
2323}
2324
2325static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2326{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002327 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002328
Philipp Reisner89e58e72011-01-19 13:12:45 +01002329 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002330 case ASB_DISCARD_YOUNGER_PRI:
2331 case ASB_DISCARD_OLDER_PRI:
2332 case ASB_DISCARD_LEAST_CHG:
2333 case ASB_DISCARD_LOCAL:
2334 case ASB_DISCARD_REMOTE:
2335 case ASB_CONSENSUS:
2336 case ASB_DISCARD_SECONDARY:
2337 dev_err(DEV, "Configuration error.\n");
2338 break;
2339 case ASB_VIOLENTLY:
2340 rv = drbd_asb_recover_0p(mdev);
2341 break;
2342 case ASB_DISCONNECT:
2343 break;
2344 case ASB_CALL_HELPER:
2345 hg = drbd_asb_recover_0p(mdev);
2346 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002347 enum drbd_state_rv rv2;
2348
Philipp Reisnerb411b362009-09-25 16:07:19 -07002349 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2350 * we might be here in C_WF_REPORT_PARAMS which is transient.
2351 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002352 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2353 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354 drbd_khelper(mdev, "pri-lost-after-sb");
2355 } else {
2356 dev_warn(DEV, "Successfully gave up primary role.\n");
2357 rv = hg;
2358 }
2359 } else
2360 rv = hg;
2361 }
2362
2363 return rv;
2364}
2365
2366static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2367 u64 bits, u64 flags)
2368{
2369 if (!uuid) {
2370 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2371 return;
2372 }
2373 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2374 text,
2375 (unsigned long long)uuid[UI_CURRENT],
2376 (unsigned long long)uuid[UI_BITMAP],
2377 (unsigned long long)uuid[UI_HISTORY_START],
2378 (unsigned long long)uuid[UI_HISTORY_END],
2379 (unsigned long long)bits,
2380 (unsigned long long)flags);
2381}
2382
2383/*
2384 100 after split brain try auto recover
2385 2 C_SYNC_SOURCE set BitMap
2386 1 C_SYNC_SOURCE use BitMap
2387 0 no Sync
2388 -1 C_SYNC_TARGET use BitMap
2389 -2 C_SYNC_TARGET set BitMap
2390 -100 after split brain, disconnect
2391-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002392-1091 requires proto 91
2393-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 */
2395static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2396{
2397 u64 self, peer;
2398 int i, j;
2399
2400 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2401 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2402
2403 *rule_nr = 10;
2404 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2405 return 0;
2406
2407 *rule_nr = 20;
2408 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2409 peer != UUID_JUST_CREATED)
2410 return -2;
2411
2412 *rule_nr = 30;
2413 if (self != UUID_JUST_CREATED &&
2414 (peer == UUID_JUST_CREATED || peer == (u64)0))
2415 return 2;
2416
2417 if (self == peer) {
2418 int rct, dc; /* roles at crash time */
2419
2420 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2421
Philipp Reisner31890f42011-01-19 14:12:51 +01002422 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002423 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424
2425 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2426 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2427 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2428 drbd_uuid_set_bm(mdev, 0UL);
2429
2430 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2431 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2432 *rule_nr = 34;
2433 } else {
2434 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2435 *rule_nr = 36;
2436 }
2437
2438 return 1;
2439 }
2440
2441 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2442
Philipp Reisner31890f42011-01-19 14:12:51 +01002443 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002444 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445
2446 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2447 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2448 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2449
2450 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2451 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2452 mdev->p_uuid[UI_BITMAP] = 0UL;
2453
2454 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2455 *rule_nr = 35;
2456 } else {
2457 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2458 *rule_nr = 37;
2459 }
2460
2461 return -1;
2462 }
2463
2464 /* Common power [off|failure] */
2465 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2466 (mdev->p_uuid[UI_FLAGS] & 2);
2467 /* lowest bit is set when we were primary,
2468 * next bit (weight 2) is set when peer was primary */
2469 *rule_nr = 40;
2470
2471 switch (rct) {
2472 case 0: /* !self_pri && !peer_pri */ return 0;
2473 case 1: /* self_pri && !peer_pri */ return 1;
2474 case 2: /* !self_pri && peer_pri */ return -1;
2475 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002476 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002477 return dc ? -1 : 1;
2478 }
2479 }
2480
2481 *rule_nr = 50;
2482 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2483 if (self == peer)
2484 return -1;
2485
2486 *rule_nr = 51;
2487 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2488 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002489 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002490 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2491 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2492 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002493 /* The last P_SYNC_UUID did not get though. Undo the last start of
2494 resync as sync source modifications of the peer's UUIDs. */
2495
Philipp Reisner31890f42011-01-19 14:12:51 +01002496 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002497 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002498
2499 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2500 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002501
2502 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2503 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2504
Philipp Reisnerb411b362009-09-25 16:07:19 -07002505 return -1;
2506 }
2507 }
2508
2509 *rule_nr = 60;
2510 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2511 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2512 peer = mdev->p_uuid[i] & ~((u64)1);
2513 if (self == peer)
2514 return -2;
2515 }
2516
2517 *rule_nr = 70;
2518 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2519 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2520 if (self == peer)
2521 return 1;
2522
2523 *rule_nr = 71;
2524 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2525 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002526 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002527 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2528 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2529 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530 /* The last P_SYNC_UUID did not get though. Undo the last start of
2531 resync as sync source modifications of our UUIDs. */
2532
Philipp Reisner31890f42011-01-19 14:12:51 +01002533 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002534 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002535
2536 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2537 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2538
Philipp Reisner4a23f262011-01-11 17:42:17 +01002539 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002540 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2541 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2542
2543 return 1;
2544 }
2545 }
2546
2547
2548 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002549 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2551 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2552 if (self == peer)
2553 return 2;
2554 }
2555
2556 *rule_nr = 90;
2557 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2558 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2559 if (self == peer && self != ((u64)0))
2560 return 100;
2561
2562 *rule_nr = 100;
2563 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2564 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2565 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2566 peer = mdev->p_uuid[j] & ~((u64)1);
2567 if (self == peer)
2568 return -100;
2569 }
2570 }
2571
2572 return -1000;
2573}
2574
2575/* drbd_sync_handshake() returns the new conn state on success, or
2576 CONN_MASK (-1) on failure.
2577 */
2578static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2579 enum drbd_disk_state peer_disk) __must_hold(local)
2580{
2581 int hg, rule_nr;
2582 enum drbd_conns rv = C_MASK;
2583 enum drbd_disk_state mydisk;
2584
2585 mydisk = mdev->state.disk;
2586 if (mydisk == D_NEGOTIATING)
2587 mydisk = mdev->new_state_tmp.disk;
2588
2589 dev_info(DEV, "drbd_sync_handshake:\n");
2590 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2591 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2592 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2593
2594 hg = drbd_uuid_compare(mdev, &rule_nr);
2595
2596 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2597
2598 if (hg == -1000) {
2599 dev_alert(DEV, "Unrelated data, aborting!\n");
2600 return C_MASK;
2601 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002602 if (hg < -1000) {
2603 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002604 return C_MASK;
2605 }
2606
2607 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2608 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2609 int f = (hg == -100) || abs(hg) == 2;
2610 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2611 if (f)
2612 hg = hg*2;
2613 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2614 hg > 0 ? "source" : "target");
2615 }
2616
Adam Gandelman3a11a482010-04-08 16:48:23 -07002617 if (abs(hg) == 100)
2618 drbd_khelper(mdev, "initial-split-brain");
2619
Philipp Reisner89e58e72011-01-19 13:12:45 +01002620 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621 int pcount = (mdev->state.role == R_PRIMARY)
2622 + (peer_role == R_PRIMARY);
2623 int forced = (hg == -100);
2624
2625 switch (pcount) {
2626 case 0:
2627 hg = drbd_asb_recover_0p(mdev);
2628 break;
2629 case 1:
2630 hg = drbd_asb_recover_1p(mdev);
2631 break;
2632 case 2:
2633 hg = drbd_asb_recover_2p(mdev);
2634 break;
2635 }
2636 if (abs(hg) < 100) {
2637 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2638 "automatically solved. Sync from %s node\n",
2639 pcount, (hg < 0) ? "peer" : "this");
2640 if (forced) {
2641 dev_warn(DEV, "Doing a full sync, since"
2642 " UUIDs where ambiguous.\n");
2643 hg = hg*2;
2644 }
2645 }
2646 }
2647
2648 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002649 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002650 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002651 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 hg = 1;
2653
2654 if (abs(hg) < 100)
2655 dev_warn(DEV, "Split-Brain detected, manually solved. "
2656 "Sync from %s node\n",
2657 (hg < 0) ? "peer" : "this");
2658 }
2659
2660 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002661 /* FIXME this log message is not correct if we end up here
2662 * after an attempted attach on a diskless node.
2663 * We just refuse to attach -- well, we drop the "connection"
2664 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002665 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666 drbd_khelper(mdev, "split-brain");
2667 return C_MASK;
2668 }
2669
2670 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2671 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2672 return C_MASK;
2673 }
2674
2675 if (hg < 0 && /* by intention we do not use mydisk here. */
2676 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002677 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678 case ASB_CALL_HELPER:
2679 drbd_khelper(mdev, "pri-lost");
2680 /* fall through */
2681 case ASB_DISCONNECT:
2682 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2683 return C_MASK;
2684 case ASB_VIOLENTLY:
2685 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2686 "assumption\n");
2687 }
2688 }
2689
Philipp Reisner89e58e72011-01-19 13:12:45 +01002690 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002691 if (hg == 0)
2692 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2693 else
2694 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2695 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2696 abs(hg) >= 2 ? "full" : "bit-map based");
2697 return C_MASK;
2698 }
2699
Philipp Reisnerb411b362009-09-25 16:07:19 -07002700 if (abs(hg) >= 2) {
2701 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002702 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2703 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002704 return C_MASK;
2705 }
2706
2707 if (hg > 0) { /* become sync source. */
2708 rv = C_WF_BITMAP_S;
2709 } else if (hg < 0) { /* become sync target */
2710 rv = C_WF_BITMAP_T;
2711 } else {
2712 rv = C_CONNECTED;
2713 if (drbd_bm_total_weight(mdev)) {
2714 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2715 drbd_bm_total_weight(mdev));
2716 }
2717 }
2718
2719 return rv;
2720}
2721
2722/* returns 1 if invalid */
2723static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2724{
2725 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2726 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2727 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2728 return 0;
2729
2730 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2731 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2732 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2733 return 1;
2734
2735 /* everything else is valid if they are equal on both sides. */
2736 if (peer == self)
2737 return 0;
2738
2739 /* everything es is invalid. */
2740 return 1;
2741}
2742
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002743static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2744 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002746 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002747 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002748 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002749 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2750
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 p_proto = be32_to_cpu(p->protocol);
2752 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2753 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2754 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002755 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002756 cf = be32_to_cpu(p->conn_flags);
2757 p_want_lose = cf & CF_WANT_LOSE;
2758
2759 clear_bit(CONN_DRY_RUN, &mdev->flags);
2760
2761 if (cf & CF_DRY_RUN)
2762 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763
Philipp Reisner89e58e72011-01-19 13:12:45 +01002764 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002765 dev_err(DEV, "incompatible communication protocols\n");
2766 goto disconnect;
2767 }
2768
Philipp Reisner89e58e72011-01-19 13:12:45 +01002769 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2771 goto disconnect;
2772 }
2773
Philipp Reisner89e58e72011-01-19 13:12:45 +01002774 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2776 goto disconnect;
2777 }
2778
Philipp Reisner89e58e72011-01-19 13:12:45 +01002779 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002780 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2781 goto disconnect;
2782 }
2783
Philipp Reisner89e58e72011-01-19 13:12:45 +01002784 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2786 goto disconnect;
2787 }
2788
Philipp Reisner89e58e72011-01-19 13:12:45 +01002789 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2791 goto disconnect;
2792 }
2793
Philipp Reisner31890f42011-01-19 14:12:51 +01002794 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002795 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002797 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002798 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799
2800 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2801 if (strcmp(p_integrity_alg, my_alg)) {
2802 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2803 goto disconnect;
2804 }
2805 dev_info(DEV, "data-integrity-alg: %s\n",
2806 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2807 }
2808
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002809 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810
2811disconnect:
2812 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002813 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002814}
2815
2816/* helper function
2817 * input: alg name, feature name
2818 * return: NULL (alg name was "")
2819 * ERR_PTR(error) if something goes wrong
2820 * or the crypto hash ptr, if it worked out ok. */
2821struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2822 const char *alg, const char *name)
2823{
2824 struct crypto_hash *tfm;
2825
2826 if (!alg[0])
2827 return NULL;
2828
2829 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2830 if (IS_ERR(tfm)) {
2831 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2832 alg, name, PTR_ERR(tfm));
2833 return tfm;
2834 }
2835 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2836 crypto_free_hash(tfm);
2837 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2838 return ERR_PTR(-EINVAL);
2839 }
2840 return tfm;
2841}
2842
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002843static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2844 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002846 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002847 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848 unsigned int header_size, data_size, exp_max_sz;
2849 struct crypto_hash *verify_tfm = NULL;
2850 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002851 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002852 int *rs_plan_s = NULL;
2853 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854
2855 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2856 : apv == 88 ? sizeof(struct p_rs_param)
2857 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002858 : apv <= 94 ? sizeof(struct p_rs_param_89)
2859 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002860
Philipp Reisner02918be2010-08-20 14:35:10 +02002861 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002863 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002864 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 }
2866
2867 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002868 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002869 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002870 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002871 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002872 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002874 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002875 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002876 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 D_ASSERT(data_size == 0);
2878 }
2879
2880 /* initialize verify_alg and csums_alg */
2881 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2882
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002883 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002884 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002885
2886 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2887
2888 if (apv >= 88) {
2889 if (apv == 88) {
2890 if (data_size > SHARED_SECRET_MAX) {
2891 dev_err(DEV, "verify-alg too long, "
2892 "peer wants %u, accepting only %u byte\n",
2893 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002894 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895 }
2896
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002897 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002898 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899
2900 /* we expect NUL terminated string */
2901 /* but just in case someone tries to be evil */
2902 D_ASSERT(p->verify_alg[data_size-1] == 0);
2903 p->verify_alg[data_size-1] = 0;
2904
2905 } else /* apv >= 89 */ {
2906 /* we still expect NUL terminated strings */
2907 /* but just in case someone tries to be evil */
2908 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2909 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2910 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2911 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2912 }
2913
2914 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2915 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2916 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2917 mdev->sync_conf.verify_alg, p->verify_alg);
2918 goto disconnect;
2919 }
2920 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2921 p->verify_alg, "verify-alg");
2922 if (IS_ERR(verify_tfm)) {
2923 verify_tfm = NULL;
2924 goto disconnect;
2925 }
2926 }
2927
2928 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2929 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2930 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2931 mdev->sync_conf.csums_alg, p->csums_alg);
2932 goto disconnect;
2933 }
2934 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2935 p->csums_alg, "csums-alg");
2936 if (IS_ERR(csums_tfm)) {
2937 csums_tfm = NULL;
2938 goto disconnect;
2939 }
2940 }
2941
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002942 if (apv > 94) {
2943 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2944 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2945 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2946 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2947 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002948
2949 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2950 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2951 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2952 if (!rs_plan_s) {
2953 dev_err(DEV, "kmalloc of fifo_buffer failed");
2954 goto disconnect;
2955 }
2956 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002957 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958
2959 spin_lock(&mdev->peer_seq_lock);
2960 /* lock against drbd_nl_syncer_conf() */
2961 if (verify_tfm) {
2962 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2963 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2964 crypto_free_hash(mdev->verify_tfm);
2965 mdev->verify_tfm = verify_tfm;
2966 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2967 }
2968 if (csums_tfm) {
2969 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2970 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2971 crypto_free_hash(mdev->csums_tfm);
2972 mdev->csums_tfm = csums_tfm;
2973 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2974 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002975 if (fifo_size != mdev->rs_plan_s.size) {
2976 kfree(mdev->rs_plan_s.values);
2977 mdev->rs_plan_s.values = rs_plan_s;
2978 mdev->rs_plan_s.size = fifo_size;
2979 mdev->rs_planed = 0;
2980 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981 spin_unlock(&mdev->peer_seq_lock);
2982 }
2983
2984 return ok;
2985disconnect:
2986 /* just for completeness: actually not needed,
2987 * as this is not reached if csums_tfm was ok. */
2988 crypto_free_hash(csums_tfm);
2989 /* but free the verify_tfm again, if csums_tfm did not work out */
2990 crypto_free_hash(verify_tfm);
2991 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002992 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993}
2994
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995/* warn if the arguments differ by more than 12.5% */
2996static void warn_if_differ_considerably(struct drbd_conf *mdev,
2997 const char *s, sector_t a, sector_t b)
2998{
2999 sector_t d;
3000 if (a == 0 || b == 0)
3001 return;
3002 d = (a > b) ? (a - b) : (b - a);
3003 if (d > (a>>3) || d > (b>>3))
3004 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3005 (unsigned long long)a, (unsigned long long)b);
3006}
3007
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003008static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3009 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003011 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 sector_t p_size, p_usize, my_usize;
3014 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003015 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017 p_size = be64_to_cpu(p->d_size);
3018 p_usize = be64_to_cpu(p->u_size);
3019
3020 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3021 dev_err(DEV, "some backing storage is needed\n");
3022 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003023 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024 }
3025
3026 /* just store the peer's disk size for now.
3027 * we still need to figure out whether we accept that. */
3028 mdev->p_size = p_size;
3029
Philipp Reisnerb411b362009-09-25 16:07:19 -07003030 if (get_ldev(mdev)) {
3031 warn_if_differ_considerably(mdev, "lower level device sizes",
3032 p_size, drbd_get_max_capacity(mdev->ldev));
3033 warn_if_differ_considerably(mdev, "user requested size",
3034 p_usize, mdev->ldev->dc.disk_size);
3035
3036 /* if this is the first connect, or an otherwise expected
3037 * param exchange, choose the minimum */
3038 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3039 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3040 p_usize);
3041
3042 my_usize = mdev->ldev->dc.disk_size;
3043
3044 if (mdev->ldev->dc.disk_size != p_usize) {
3045 mdev->ldev->dc.disk_size = p_usize;
3046 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3047 (unsigned long)mdev->ldev->dc.disk_size);
3048 }
3049
3050 /* Never shrink a device with usable data during connect.
3051 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003052 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 drbd_get_capacity(mdev->this_bdev) &&
3054 mdev->state.disk >= D_OUTDATED &&
3055 mdev->state.conn < C_CONNECTED) {
3056 dev_err(DEV, "The peer's disk size is too small!\n");
3057 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3058 mdev->ldev->dc.disk_size = my_usize;
3059 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003060 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061 }
3062 put_ldev(mdev);
3063 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064
Philipp Reisnere89b5912010-03-24 17:11:33 +01003065 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003067 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068 put_ldev(mdev);
3069 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003070 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 drbd_md_sync(mdev);
3072 } else {
3073 /* I am diskless, need to accept the peer's size. */
3074 drbd_set_my_capacity(mdev, p_size);
3075 }
3076
Philipp Reisner99432fc2011-05-20 16:39:13 +02003077 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3078 drbd_reconsider_max_bio_size(mdev);
3079
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 if (get_ldev(mdev)) {
3081 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3082 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3083 ldsc = 1;
3084 }
3085
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 put_ldev(mdev);
3087 }
3088
3089 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3090 if (be64_to_cpu(p->c_size) !=
3091 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3092 /* we have different sizes, probably peer
3093 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003094 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 }
3096 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3097 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3098 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003099 mdev->state.disk >= D_INCONSISTENT) {
3100 if (ddsf & DDSF_NO_RESYNC)
3101 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3102 else
3103 resync_after_online_grow(mdev);
3104 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3106 }
3107 }
3108
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003109 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110}
3111
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003112static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3113 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003115 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003117 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3120
3121 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3122 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3123
3124 kfree(mdev->p_uuid);
3125 mdev->p_uuid = p_uuid;
3126
3127 if (mdev->state.conn < C_CONNECTED &&
3128 mdev->state.disk < D_INCONSISTENT &&
3129 mdev->state.role == R_PRIMARY &&
3130 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3131 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3132 (unsigned long long)mdev->ed_uuid);
3133 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003134 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003135 }
3136
3137 if (get_ldev(mdev)) {
3138 int skip_initial_sync =
3139 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003140 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3142 (p_uuid[UI_FLAGS] & 8);
3143 if (skip_initial_sync) {
3144 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3145 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003146 "clear_n_write from receive_uuids",
3147 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3149 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3150 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3151 CS_VERBOSE, NULL);
3152 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003153 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003154 }
3155 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003156 } else if (mdev->state.disk < D_INCONSISTENT &&
3157 mdev->state.role == R_PRIMARY) {
3158 /* I am a diskless primary, the peer just created a new current UUID
3159 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003160 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 }
3162
3163 /* Before we test for the disk state, we should wait until an eventually
3164 ongoing cluster wide state change is finished. That is important if
3165 we are primary and are detaching from our disk. We need to see the
3166 new disk state... */
3167 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3168 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003169 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3170
3171 if (updated_uuids)
3172 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003174 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003175}
3176
3177/**
3178 * convert_state() - Converts the peer's view of the cluster state to our point of view
3179 * @ps: The state as seen by the peer.
3180 */
3181static union drbd_state convert_state(union drbd_state ps)
3182{
3183 union drbd_state ms;
3184
3185 static enum drbd_conns c_tab[] = {
3186 [C_CONNECTED] = C_CONNECTED,
3187
3188 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3189 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3190 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3191 [C_VERIFY_S] = C_VERIFY_T,
3192 [C_MASK] = C_MASK,
3193 };
3194
3195 ms.i = ps.i;
3196
3197 ms.conn = c_tab[ps.conn];
3198 ms.peer = ps.role;
3199 ms.role = ps.peer;
3200 ms.pdsk = ps.disk;
3201 ms.disk = ps.pdsk;
3202 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3203
3204 return ms;
3205}
3206
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003207static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3208 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003210 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003211 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003212 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 mask.i = be32_to_cpu(p->mask);
3215 val.i = be32_to_cpu(p->val);
3216
Philipp Reisner25703f82011-02-07 14:35:25 +01003217 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003218 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3219 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003220 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 }
3222
3223 mask = convert_state(mask);
3224 val = convert_state(val);
3225
3226 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3227
3228 drbd_send_sr_reply(mdev, rv);
3229 drbd_md_sync(mdev);
3230
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003231 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232}
3233
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003234static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3235 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003236{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003237 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003238 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003240 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 int rv;
3242
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243 peer_state.i = be32_to_cpu(p->state);
3244
3245 real_peer_disk = peer_state.disk;
3246 if (peer_state.disk == D_NEGOTIATING) {
3247 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3248 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3249 }
3250
Philipp Reisner87eeee42011-01-19 14:16:30 +01003251 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003252 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003253 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003254 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003255
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003256 /* peer says his disk is uptodate, while we think it is inconsistent,
3257 * and this happens while we think we have a sync going on. */
3258 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3259 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3260 /* If we are (becoming) SyncSource, but peer is still in sync
3261 * preparation, ignore its uptodate-ness to avoid flapping, it
3262 * will change to inconsistent once the peer reaches active
3263 * syncing states.
3264 * It may have changed syncer-paused flags, however, so we
3265 * cannot ignore this completely. */
3266 if (peer_state.conn > C_CONNECTED &&
3267 peer_state.conn < C_SYNC_SOURCE)
3268 real_peer_disk = D_INCONSISTENT;
3269
3270 /* if peer_state changes to connected at the same time,
3271 * it explicitly notifies us that it finished resync.
3272 * Maybe we should finish it up, too? */
3273 else if (os.conn >= C_SYNC_SOURCE &&
3274 peer_state.conn == C_CONNECTED) {
3275 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3276 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003277 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003278 }
3279 }
3280
3281 /* peer says his disk is inconsistent, while we think it is uptodate,
3282 * and this happens while the peer still thinks we have a sync going on,
3283 * but we think we are already done with the sync.
3284 * We ignore this to avoid flapping pdsk.
3285 * This should not happen, if the peer is a recent version of drbd. */
3286 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3287 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3288 real_peer_disk = D_UP_TO_DATE;
3289
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003290 if (ns.conn == C_WF_REPORT_PARAMS)
3291 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003292
Philipp Reisner67531712010-10-27 12:21:30 +02003293 if (peer_state.conn == C_AHEAD)
3294 ns.conn = C_BEHIND;
3295
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3297 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3298 int cr; /* consider resync */
3299
3300 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003301 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 /* if we had an established connection
3303 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003304 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003306 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003307 /* if we have both been inconsistent, and the peer has been
3308 * forced to be UpToDate with --overwrite-data */
3309 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3310 /* if we had been plain connected, and the admin requested to
3311 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003312 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003313 (peer_state.conn >= C_STARTING_SYNC_S &&
3314 peer_state.conn <= C_WF_BITMAP_T));
3315
3316 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003317 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003318
3319 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003320 if (ns.conn == C_MASK) {
3321 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003323 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 } else if (peer_state.disk == D_NEGOTIATING) {
3325 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3326 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003327 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003329 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003330 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003331 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003333 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003334 }
3335 }
3336 }
3337
Philipp Reisner87eeee42011-01-19 14:16:30 +01003338 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003339 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340 goto retry;
3341 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 ns.peer = peer_state.role;
3343 ns.pdsk = real_peer_disk;
3344 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003345 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003346 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003347 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3348 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003349 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003350 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003351 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003352 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003353 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3354 tl_clear(mdev);
3355 drbd_uuid_new_current(mdev);
3356 clear_bit(NEW_CUR_UUID, &mdev->flags);
3357 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003358 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003359 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003360 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003361 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003362 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363
3364 if (rv < SS_SUCCESS) {
3365 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003366 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 }
3368
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003369 if (os.conn > C_WF_REPORT_PARAMS) {
3370 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003371 peer_state.disk != D_NEGOTIATING ) {
3372 /* we want resync, peer has not yet decided to sync... */
3373 /* Nowadays only used when forcing a node into primary role and
3374 setting its disk to UpToDate with that */
3375 drbd_send_uuids(mdev);
3376 drbd_send_state(mdev);
3377 }
3378 }
3379
Philipp Reisner89e58e72011-01-19 13:12:45 +01003380 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003381
3382 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3383
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003384 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003385}
3386
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003387static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3388 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003389{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003390 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003391
3392 wait_event(mdev->misc_wait,
3393 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003394 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003395 mdev->state.conn < C_CONNECTED ||
3396 mdev->state.disk < D_NEGOTIATING);
3397
3398 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3399
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400 /* Here the _drbd_uuid_ functions are right, current should
3401 _not_ be rotated into the history */
3402 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3403 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3404 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3405
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003406 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003407 drbd_start_resync(mdev, C_SYNC_TARGET);
3408
3409 put_ldev(mdev);
3410 } else
3411 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3412
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003413 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003414}
3415
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003416/**
3417 * receive_bitmap_plain
3418 *
3419 * Return 0 when done, 1 when another iteration is needed, and a negative error
3420 * code upon failure.
3421 */
3422static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003423receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3424 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425{
3426 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3427 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003428 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429
Philipp Reisner02918be2010-08-20 14:35:10 +02003430 if (want != data_size) {
3431 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003432 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003433 }
3434 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003435 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003436 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003437 if (err != want) {
3438 if (err >= 0)
3439 err = -EIO;
3440 return err;
3441 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003442
3443 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3444
3445 c->word_offset += num_words;
3446 c->bit_offset = c->word_offset * BITS_PER_LONG;
3447 if (c->bit_offset > c->bm_bits)
3448 c->bit_offset = c->bm_bits;
3449
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003450 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451}
3452
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003453/**
3454 * recv_bm_rle_bits
3455 *
3456 * Return 0 when done, 1 when another iteration is needed, and a negative error
3457 * code upon failure.
3458 */
3459static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460recv_bm_rle_bits(struct drbd_conf *mdev,
3461 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003462 struct bm_xfer_ctx *c,
3463 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464{
3465 struct bitstream bs;
3466 u64 look_ahead;
3467 u64 rl;
3468 u64 tmp;
3469 unsigned long s = c->bit_offset;
3470 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471 int toggle = DCBP_get_start(p);
3472 int have;
3473 int bits;
3474
3475 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3476
3477 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3478 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003479 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480
3481 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3482 bits = vli_decode_bits(&rl, look_ahead);
3483 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003484 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485
3486 if (toggle) {
3487 e = s + rl -1;
3488 if (e >= c->bm_bits) {
3489 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003490 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003491 }
3492 _drbd_bm_set_bits(mdev, s, e);
3493 }
3494
3495 if (have < bits) {
3496 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3497 have, bits, look_ahead,
3498 (unsigned int)(bs.cur.b - p->code),
3499 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003500 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501 }
3502 look_ahead >>= bits;
3503 have -= bits;
3504
3505 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3506 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003507 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 look_ahead |= tmp << have;
3509 have += bits;
3510 }
3511
3512 c->bit_offset = s;
3513 bm_xfer_ctx_bit_to_word_offset(c);
3514
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003515 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516}
3517
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003518/**
3519 * decode_bitmap_c
3520 *
3521 * Return 0 when done, 1 when another iteration is needed, and a negative error
3522 * code upon failure.
3523 */
3524static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525decode_bitmap_c(struct drbd_conf *mdev,
3526 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003527 struct bm_xfer_ctx *c,
3528 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529{
3530 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003531 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532
3533 /* other variants had been implemented for evaluation,
3534 * but have been dropped as this one turned out to be "best"
3535 * during all our tests. */
3536
3537 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3538 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003539 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003540}
3541
3542void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3543 const char *direction, struct bm_xfer_ctx *c)
3544{
3545 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003546 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3548 + c->bm_words * sizeof(long);
3549 unsigned total = c->bytes[0] + c->bytes[1];
3550 unsigned r;
3551
3552 /* total can not be zero. but just in case: */
3553 if (total == 0)
3554 return;
3555
3556 /* don't report if not compressed */
3557 if (total >= plain)
3558 return;
3559
3560 /* total < plain. check for overflow, still */
3561 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3562 : (1000 * total / plain);
3563
3564 if (r > 1000)
3565 r = 1000;
3566
3567 r = 1000 - r;
3568 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3569 "total %u; compression: %u.%u%%\n",
3570 direction,
3571 c->bytes[1], c->packets[1],
3572 c->bytes[0], c->packets[0],
3573 total, r/10, r % 10);
3574}
3575
3576/* Since we are processing the bitfield from lower addresses to higher,
3577 it does not matter if the process it in 32 bit chunks or 64 bit
3578 chunks as long as it is little endian. (Understand it as byte stream,
3579 beginning with the lowest byte...) If we would use big endian
3580 we would need to process it from the highest address to the lowest,
3581 in order to be agnostic to the 32 vs 64 bits issue.
3582
3583 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003584static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3585 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586{
3587 struct bm_xfer_ctx c;
3588 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003589 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003590 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003591 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003592 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003594 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3595 /* you are supposed to send additional out-of-sync information
3596 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597
3598 /* maybe we should use some per thread scratch page,
3599 * and allocate that during initial device creation? */
3600 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3601 if (!buffer) {
3602 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3603 goto out;
3604 }
3605
3606 c = (struct bm_xfer_ctx) {
3607 .bm_bits = drbd_bm_bits(mdev),
3608 .bm_words = drbd_bm_words(mdev),
3609 };
3610
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003611 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003612 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003613 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003614 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003615 /* MAYBE: sanity check that we speak proto >= 90,
3616 * and the feature is enabled! */
3617 struct p_compressed_bm *p;
3618
Philipp Reisner02918be2010-08-20 14:35:10 +02003619 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620 dev_err(DEV, "ReportCBitmap packet too large\n");
3621 goto out;
3622 }
3623 /* use the page buff */
3624 p = buffer;
3625 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003626 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003628 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3629 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003630 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003632 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003634 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 goto out;
3636 }
3637
Philipp Reisner02918be2010-08-20 14:35:10 +02003638 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003639 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003640
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003641 if (err <= 0) {
3642 if (err < 0)
3643 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003645 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003646 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003648 cmd = pi.cmd;
3649 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003650 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651
3652 INFO_bm_xfer_stats(mdev, "receive", &c);
3653
3654 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003655 enum drbd_state_rv rv;
3656
Philipp Reisnerb411b362009-09-25 16:07:19 -07003657 ok = !drbd_send_bitmap(mdev);
3658 if (!ok)
3659 goto out;
3660 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003661 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3662 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3664 /* admin may have requested C_DISCONNECTING,
3665 * other threads may have noticed network errors */
3666 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3667 drbd_conn_str(mdev->state.conn));
3668 }
3669
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003670 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003671 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003672 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3674 drbd_start_resync(mdev, C_SYNC_SOURCE);
3675 free_page((unsigned long) buffer);
3676 return ok;
3677}
3678
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003679static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3680 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681{
3682 /* TODO zero copy sink :) */
3683 static char sink[128];
3684 int size, want, r;
3685
Philipp Reisner02918be2010-08-20 14:35:10 +02003686 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3687 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688
Philipp Reisner02918be2010-08-20 14:35:10 +02003689 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690 while (size > 0) {
3691 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003692 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003693 if (!expect(r > 0))
3694 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003695 size -= r;
3696 }
3697 return size == 0;
3698}
3699
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003700static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3701 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703 /* Make sure we've acked all the TCP data associated
3704 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003705 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003707 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708}
3709
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003710static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3711 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003712{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003713 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003714
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003715 switch (mdev->state.conn) {
3716 case C_WF_SYNC_UUID:
3717 case C_WF_BITMAP_T:
3718 case C_BEHIND:
3719 break;
3720 default:
3721 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3722 drbd_conn_str(mdev->state.conn));
3723 }
3724
Philipp Reisner73a01a12010-10-27 14:33:00 +02003725 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3726
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003727 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003728}
3729
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003730typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3731 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732
Philipp Reisner02918be2010-08-20 14:35:10 +02003733struct data_cmd {
3734 int expect_payload;
3735 size_t pkt_size;
3736 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003737};
3738
Philipp Reisner02918be2010-08-20 14:35:10 +02003739static struct data_cmd drbd_cmd_handler[] = {
3740 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3741 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3742 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3743 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003744 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3745 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3746 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003747 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3748 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003749 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3750 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003751 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3752 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3753 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3754 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3755 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3756 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3757 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3758 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3759 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3760 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003761 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003762 /* anything missing from this table is in
3763 * the asender_tbl, see get_asender_cmd */
3764 [P_MAX_CMD] = { 0, 0, NULL },
3765};
3766
3767/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003768 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003769
Philipp Reisnere42325a2011-01-19 13:55:45 +01003770 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003771 p_header, but they may not rely on that. Since there is also p_header95 !
3772 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773
3774static void drbdd(struct drbd_conf *mdev)
3775{
Philipp Reisnerc0129492011-01-19 16:58:16 +01003776 struct p_header *header = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003777 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003778 size_t shs; /* sub header size */
3779 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003781 while (get_t_state(&mdev->tconn->receiver) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01003782 drbd_thread_current_set_cpu(&mdev->tconn->receiver);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003783 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003784 goto err_out;
3785
Philipp Reisner77351055b2011-02-07 17:24:26 +01003786 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
3787 dev_err(DEV, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003788 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003789 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003790
Philipp Reisner77351055b2011-02-07 17:24:26 +01003791 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3792 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
3793 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003794 goto err_out;
3795 }
3796
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003797 if (shs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003798 rv = drbd_recv(mdev->tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003799 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003800 if (!signal_pending(current))
3801 dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003802 goto err_out;
3803 }
3804 }
3805
Philipp Reisner77351055b2011-02-07 17:24:26 +01003806 rv = drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003807
3808 if (unlikely(!rv)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809 dev_err(DEV, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003810 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003811 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003813 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814
Philipp Reisner02918be2010-08-20 14:35:10 +02003815 if (0) {
3816 err_out:
3817 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 }
Lars Ellenberg856c50c2010-10-14 13:37:40 +02003819 /* If we leave here, we probably want to update at least the
3820 * "Connected" indicator on stable storage. Do so explicitly here. */
3821 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822}
3823
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003824void drbd_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825{
3826 struct drbd_wq_barrier barr;
3827
3828 barr.w.cb = w_prev_work_done;
3829 init_completion(&barr.done);
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003830 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831 wait_for_completion(&barr.done);
3832}
3833
3834static void drbd_disconnect(struct drbd_conf *mdev)
3835{
3836 enum drbd_fencing_p fp;
3837 union drbd_state os, ns;
3838 int rv = SS_UNKNOWN_ERROR;
3839 unsigned int i;
3840
3841 if (mdev->state.conn == C_STANDALONE)
3842 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003843
3844 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003845 drbd_thread_stop(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 drbd_free_sock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847
Philipp Reisner85719572010-07-21 10:20:17 +02003848 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003849 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003850 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3851 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3852 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003853 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854
3855 /* We do not have data structures that would allow us to
3856 * get the rs_pending_cnt down to 0 again.
3857 * * On C_SYNC_TARGET we do not have any data structures describing
3858 * the pending RSDataRequest's we have sent.
3859 * * On C_SYNC_SOURCE there is no data structure that tracks
3860 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3861 * And no, it is not the sum of the reference counts in the
3862 * resync_LRU. The resync_LRU tracks the whole operation including
3863 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3864 * on the fly. */
3865 drbd_rs_cancel_all(mdev);
3866 mdev->rs_total = 0;
3867 mdev->rs_failed = 0;
3868 atomic_set(&mdev->rs_pending_cnt, 0);
3869 wake_up(&mdev->misc_wait);
3870
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003871 del_timer(&mdev->request_timer);
3872
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873 /* make sure syncer is stopped and w_resume_next_sg queued */
3874 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003875 resync_timer_fn((unsigned long)mdev);
3876
Philipp Reisnerb411b362009-09-25 16:07:19 -07003877 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3878 * w_make_resync_request etc. which may still be on the worker queue
3879 * to be "canceled" */
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003880 drbd_flush_workqueue(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881
3882 /* This also does reclaim_net_ee(). If we do this too early, we might
3883 * miss some resync ee and pages.*/
3884 drbd_process_done_ee(mdev);
3885
3886 kfree(mdev->p_uuid);
3887 mdev->p_uuid = NULL;
3888
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003889 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890 tl_clear(mdev);
3891
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892 dev_info(DEV, "Connection closed\n");
3893
3894 drbd_md_sync(mdev);
3895
3896 fp = FP_DONT_CARE;
3897 if (get_ldev(mdev)) {
3898 fp = mdev->ldev->dc.fencing;
3899 put_ldev(mdev);
3900 }
3901
Philipp Reisner87f7be42010-06-11 13:56:33 +02003902 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3903 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904
Philipp Reisner87eeee42011-01-19 14:16:30 +01003905 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906 os = mdev->state;
3907 if (os.conn >= C_UNCONNECTED) {
3908 /* Do not restart in case we are C_DISCONNECTING */
3909 ns = os;
3910 ns.conn = C_UNCONNECTED;
3911 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
3912 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003913 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003914
3915 if (os.conn == C_DISCONNECTING) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01003916 wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003917
Philipp Reisnera0638452011-01-19 14:31:32 +01003918 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
3919 mdev->tconn->cram_hmac_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003920
Philipp Reisner89e58e72011-01-19 13:12:45 +01003921 kfree(mdev->tconn->net_conf);
3922 mdev->tconn->net_conf = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3924 }
3925
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003926 /* serialize with bitmap writeout triggered by the state change,
3927 * if any. */
3928 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3929
Philipp Reisnerb411b362009-09-25 16:07:19 -07003930 /* tcp_close and release of sendpage pages can be deferred. I don't
3931 * want to use SO_LINGER, because apparently it can be deferred for
3932 * more than 20 seconds (longest time I checked).
3933 *
3934 * Actually we don't care for exactly when the network stack does its
3935 * put_page(), but release our reference on these pages right here.
3936 */
3937 i = drbd_release_ee(mdev, &mdev->net_ee);
3938 if (i)
3939 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003940 i = atomic_read(&mdev->pp_in_use_by_net);
3941 if (i)
3942 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003943 i = atomic_read(&mdev->pp_in_use);
3944 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003945 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003946
3947 D_ASSERT(list_empty(&mdev->read_ee));
3948 D_ASSERT(list_empty(&mdev->active_ee));
3949 D_ASSERT(list_empty(&mdev->sync_ee));
3950 D_ASSERT(list_empty(&mdev->done_ee));
3951
3952 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3953 atomic_set(&mdev->current_epoch->epoch_size, 0);
3954 D_ASSERT(list_empty(&mdev->current_epoch->list));
3955}
3956
3957/*
3958 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3959 * we can agree on is stored in agreed_pro_version.
3960 *
3961 * feature flags and the reserved array should be enough room for future
3962 * enhancements of the handshake protocol, and possible plugins...
3963 *
3964 * for now, they are expected to be zero, but ignored.
3965 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003966static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003967{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003968 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003969 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970 int ok;
3971
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003972 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3973 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003974 return 0; /* interrupted. not ok. */
3975 }
3976
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003977 if (tconn->data.socket == NULL) {
3978 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979 return 0;
3980 }
3981
3982 memset(p, 0, sizeof(*p));
3983 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3984 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003985 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
3986 &p->head, sizeof(*p), 0);
3987 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988 return ok;
3989}
3990
3991/*
3992 * return values:
3993 * 1 yes, we have a valid connection
3994 * 0 oops, did not work out, please try again
3995 * -1 peer talks different language,
3996 * no point in trying again, please go standalone.
3997 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01003998static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003999{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004000 /* ASSERT current == tconn->receiver ... */
4001 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004002 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004003 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004004 int rv;
4005
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004006 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007 if (!rv)
4008 return 0;
4009
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004010 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011 if (!rv)
4012 return 0;
4013
Philipp Reisner77351055b2011-02-07 17:24:26 +01004014 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004015 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004016 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 return -1;
4018 }
4019
Philipp Reisner77351055b2011-02-07 17:24:26 +01004020 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004021 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004022 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004023 return -1;
4024 }
4025
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004026 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027
4028 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004029 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004030 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031 return 0;
4032 }
4033
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034 p->protocol_min = be32_to_cpu(p->protocol_min);
4035 p->protocol_max = be32_to_cpu(p->protocol_max);
4036 if (p->protocol_max == 0)
4037 p->protocol_max = p->protocol_min;
4038
4039 if (PRO_VERSION_MAX < p->protocol_min ||
4040 PRO_VERSION_MIN > p->protocol_max)
4041 goto incompat;
4042
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004043 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004045 conn_info(tconn, "Handshake successful: "
4046 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047
4048 return 1;
4049
4050 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004051 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004052 "I support %d-%d, peer supports %d-%d\n",
4053 PRO_VERSION_MIN, PRO_VERSION_MAX,
4054 p->protocol_min, p->protocol_max);
4055 return -1;
4056}
4057
4058#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004059static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060{
4061 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4062 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004063 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004064}
4065#else
4066#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004067
4068/* Return value:
4069 1 - auth succeeded,
4070 0 - failed, try again (network error),
4071 -1 - auth failed, don't try again.
4072*/
4073
Philipp Reisner13e60372011-02-08 09:54:40 +01004074static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004075{
4076 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4077 struct scatterlist sg;
4078 char *response = NULL;
4079 char *right_response = NULL;
4080 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004081 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082 unsigned int resp_size;
4083 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004084 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085 int rv;
4086
Philipp Reisner13e60372011-02-08 09:54:40 +01004087 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 desc.flags = 0;
4089
Philipp Reisner13e60372011-02-08 09:54:40 +01004090 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4091 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004093 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004094 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095 goto fail;
4096 }
4097
4098 get_random_bytes(my_challenge, CHALLENGE_LEN);
4099
Philipp Reisner13e60372011-02-08 09:54:40 +01004100 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101 if (!rv)
4102 goto fail;
4103
Philipp Reisner13e60372011-02-08 09:54:40 +01004104 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 if (!rv)
4106 goto fail;
4107
Philipp Reisner77351055b2011-02-07 17:24:26 +01004108 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004109 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004110 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 rv = 0;
4112 goto fail;
4113 }
4114
Philipp Reisner77351055b2011-02-07 17:24:26 +01004115 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004116 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004117 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 goto fail;
4119 }
4120
Philipp Reisner77351055b2011-02-07 17:24:26 +01004121 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004122 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004123 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004124 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125 goto fail;
4126 }
4127
Philipp Reisner13e60372011-02-08 09:54:40 +01004128 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129
Philipp Reisner77351055b2011-02-07 17:24:26 +01004130 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004131 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004132 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133 rv = 0;
4134 goto fail;
4135 }
4136
Philipp Reisner13e60372011-02-08 09:54:40 +01004137 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138 response = kmalloc(resp_size, GFP_NOIO);
4139 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004140 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004141 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142 goto fail;
4143 }
4144
4145 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004146 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147
4148 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4149 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004150 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004151 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152 goto fail;
4153 }
4154
Philipp Reisner13e60372011-02-08 09:54:40 +01004155 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156 if (!rv)
4157 goto fail;
4158
Philipp Reisner13e60372011-02-08 09:54:40 +01004159 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160 if (!rv)
4161 goto fail;
4162
Philipp Reisner77351055b2011-02-07 17:24:26 +01004163 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004164 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004165 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166 rv = 0;
4167 goto fail;
4168 }
4169
Philipp Reisner77351055b2011-02-07 17:24:26 +01004170 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004171 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172 rv = 0;
4173 goto fail;
4174 }
4175
Philipp Reisner13e60372011-02-08 09:54:40 +01004176 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004177
4178 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004179 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004180 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181 rv = 0;
4182 goto fail;
4183 }
4184
4185 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004186 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004187 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004188 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 goto fail;
4190 }
4191
4192 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4193
4194 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4195 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004196 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004197 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004198 goto fail;
4199 }
4200
4201 rv = !memcmp(response, right_response, resp_size);
4202
4203 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004204 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4205 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004206 else
4207 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004208
4209 fail:
4210 kfree(peers_ch);
4211 kfree(response);
4212 kfree(right_response);
4213
4214 return rv;
4215}
4216#endif
4217
4218int drbdd_init(struct drbd_thread *thi)
4219{
4220 struct drbd_conf *mdev = thi->mdev;
4221 unsigned int minor = mdev_to_minor(mdev);
4222 int h;
4223
4224 sprintf(current->comm, "drbd%d_receiver", minor);
4225
4226 dev_info(DEV, "receiver (re)started\n");
4227
4228 do {
Philipp Reisner907599e2011-02-08 11:25:37 +01004229 h = drbd_connect(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004230 if (h == 0) {
4231 drbd_disconnect(mdev);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004232 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233 }
4234 if (h == -1) {
4235 dev_warn(DEV, "Discarding network configuration.\n");
4236 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4237 }
4238 } while (h == 0);
4239
4240 if (h > 0) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004241 if (get_net_conf(mdev->tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242 drbdd(mdev);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004243 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244 }
4245 }
4246
4247 drbd_disconnect(mdev);
4248
4249 dev_info(DEV, "receiver terminated\n");
4250 return 0;
4251}
4252
4253/* ********* acknowledge sender ******** */
4254
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004255static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004257 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258
4259 int retcode = be32_to_cpu(p->retcode);
4260
4261 if (retcode >= SS_SUCCESS) {
4262 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4263 } else {
4264 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4265 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4266 drbd_set_st_err_str(retcode), retcode);
4267 }
4268 wake_up(&mdev->state_wait);
4269
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004270 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271}
4272
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004273static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274{
4275 return drbd_send_ping_ack(mdev);
4276
4277}
4278
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004279static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280{
4281 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004282 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004283 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4284 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004286 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004287}
4288
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004289static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004291 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292 sector_t sector = be64_to_cpu(p->sector);
4293 int blksize = be32_to_cpu(p->blksize);
4294
Philipp Reisner31890f42011-01-19 14:12:51 +01004295 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296
4297 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4298
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004299 if (get_ldev(mdev)) {
4300 drbd_rs_complete_io(mdev, sector);
4301 drbd_set_in_sync(mdev, sector, blksize);
4302 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4303 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4304 put_ldev(mdev);
4305 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004307 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004309 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310}
4311
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004312static int
4313validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4314 struct rb_root *root, const char *func,
4315 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316{
4317 struct drbd_request *req;
4318 struct bio_and_error m;
4319
Philipp Reisner87eeee42011-01-19 14:16:30 +01004320 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004321 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004323 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004324 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325 }
4326 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004327 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328
4329 if (m.bio)
4330 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004331 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332}
4333
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004334static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004336 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337 sector_t sector = be64_to_cpu(p->sector);
4338 int blksize = be32_to_cpu(p->blksize);
4339 enum drbd_req_event what;
4340
4341 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4342
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004343 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344 drbd_set_in_sync(mdev, sector, blksize);
4345 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004346 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004348 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004349 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004350 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004351 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 break;
4353 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004354 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004355 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 break;
4357 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004358 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004359 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 break;
4361 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004362 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004363 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 break;
4365 default:
4366 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004367 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368 }
4369
4370 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004371 &mdev->write_requests, __func__,
4372 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004373}
4374
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004375static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004377 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004379 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004380 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4381 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004382 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383
4384 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4385
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004386 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387 dec_rs_pending(mdev);
4388 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004389 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004390 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004391
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004392 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004393 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004394 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004395 if (!found) {
4396 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4397 The master bio might already be completed, therefore the
4398 request is no longer in the collision hash. */
4399 /* In Protocol B we might already have got a P_RECV_ACK
4400 but then get a P_NEG_ACK afterwards. */
4401 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004402 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004403 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004404 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004405 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406}
4407
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004408static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004410 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004411 sector_t sector = be64_to_cpu(p->sector);
4412
4413 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4414 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4415 (unsigned long long)sector, be32_to_cpu(p->blksize));
4416
4417 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004418 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004419 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004420}
4421
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004422static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423{
4424 sector_t sector;
4425 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004426 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427
4428 sector = be64_to_cpu(p->sector);
4429 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004430
4431 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4432
4433 dec_rs_pending(mdev);
4434
4435 if (get_ldev_if_state(mdev, D_FAILED)) {
4436 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004437 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004438 case P_NEG_RS_DREPLY:
4439 drbd_rs_failed_io(mdev, sector, size);
4440 case P_RS_CANCEL:
4441 break;
4442 default:
4443 D_ASSERT(0);
4444 put_ldev(mdev);
4445 return false;
4446 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447 put_ldev(mdev);
4448 }
4449
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004450 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451}
4452
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004453static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004455 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456
4457 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4458
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004459 if (mdev->state.conn == C_AHEAD &&
4460 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004461 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4462 mdev->start_resync_timer.expires = jiffies + HZ;
4463 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004464 }
4465
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004466 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004467}
4468
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004469static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004471 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004472 struct drbd_work *w;
4473 sector_t sector;
4474 int size;
4475
4476 sector = be64_to_cpu(p->sector);
4477 size = be32_to_cpu(p->blksize);
4478
4479 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4480
4481 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4482 drbd_ov_oos_found(mdev, sector, size);
4483 else
4484 ov_oos_print(mdev);
4485
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004486 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004487 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004488
Philipp Reisnerb411b362009-09-25 16:07:19 -07004489 drbd_rs_complete_io(mdev, sector);
4490 dec_rs_pending(mdev);
4491
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004492 --mdev->ov_left;
4493
4494 /* let's advance progress step marks only for every other megabyte */
4495 if ((mdev->ov_left & 0x200) == 0x200)
4496 drbd_advance_rs_marks(mdev, mdev->ov_left);
4497
4498 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499 w = kmalloc(sizeof(*w), GFP_NOIO);
4500 if (w) {
4501 w->cb = w_ov_finished;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004502 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004503 } else {
4504 dev_err(DEV, "kmalloc(w) failed.");
4505 ov_oos_print(mdev);
4506 drbd_resync_finished(mdev);
4507 }
4508 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004509 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004510 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004511}
4512
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004513static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004514{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004515 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004516}
4517
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518struct asender_cmd {
4519 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004520 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521};
4522
4523static struct asender_cmd *get_asender_cmd(int cmd)
4524{
4525 static struct asender_cmd asender_tbl[] = {
4526 /* anything missing from this table is in
4527 * the drbd_cmd_handler (drbd_default_handler) table,
4528 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004529 [P_PING] = { sizeof(struct p_header), got_Ping },
4530 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4532 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4533 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4534 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4535 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4536 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4537 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4538 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4539 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4540 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4541 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004542 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004543 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004544 [P_MAX_CMD] = { 0, NULL },
4545 };
4546 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4547 return NULL;
4548 return &asender_tbl[cmd];
4549}
4550
4551int drbd_asender(struct drbd_thread *thi)
4552{
4553 struct drbd_conf *mdev = thi->mdev;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004554 struct p_header *h = &mdev->tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004556 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004557
Philipp Reisner257d0af2011-01-26 12:15:29 +01004558 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559 void *buf = h;
4560 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004561 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004562 int ping_timeout_active = 0;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004563 int empty;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004564
4565 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4566
4567 current->policy = SCHED_RR; /* Make this a realtime task! */
4568 current->rt_priority = 2; /* more important than all other tasks */
4569
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004570 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004571 drbd_thread_current_set_cpu(thi);
Philipp Reisnere43ef192011-02-07 14:40:40 +01004572 if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) {
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004573 if (!drbd_send_ping(mdev)) {
4574 dev_err(DEV, "drbd_send_ping has failed\n");
4575 goto reconnect;
4576 }
Philipp Reisnere42325a2011-01-19 13:55:45 +01004577 mdev->tconn->meta.socket->sk->sk_rcvtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +01004578 mdev->tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004579 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580 }
4581
4582 /* conditionally cork;
4583 * it may hurt latency if we cork without much to send */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004584 if (!mdev->tconn->net_conf->no_cork &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585 3 < atomic_read(&mdev->unacked_cnt))
Philipp Reisnere42325a2011-01-19 13:55:45 +01004586 drbd_tcp_cork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 while (1) {
Philipp Reisner808e37b2011-02-07 14:44:14 +01004588 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004589 flush_signals(current);
Lars Ellenberg0f8488e2010-10-13 18:19:23 +02004590 if (!drbd_process_done_ee(mdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004591 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592 /* to avoid race with newly queued ACKs */
Philipp Reisner808e37b2011-02-07 14:44:14 +01004593 set_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004594 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595 empty = list_empty(&mdev->done_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004596 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597 /* new ack may have been queued right here,
4598 * but then there is also a signal pending,
4599 * and we start over... */
4600 if (empty)
4601 break;
4602 }
4603 /* but unconditionally uncork unless disabled */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004604 if (!mdev->tconn->net_conf->no_cork)
Philipp Reisnere42325a2011-01-19 13:55:45 +01004605 drbd_tcp_uncork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606
4607 /* short circuit, recv_msg would return EINTR anyways. */
4608 if (signal_pending(current))
4609 continue;
4610
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +01004611 rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0);
Philipp Reisner808e37b2011-02-07 14:44:14 +01004612 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613
4614 flush_signals(current);
4615
4616 /* Note:
4617 * -EINTR (on meta) we got a signal
4618 * -EAGAIN (on meta) rcvtimeo expired
4619 * -ECONNRESET other side closed the connection
4620 * -ERESTARTSYS (on data) we got a signal
4621 * rv < 0 other than above: unexpected error!
4622 * rv == expected: full header or command
4623 * rv < expected: "woken" by signal during receive
4624 * rv == 0 : "connection shut down by peer"
4625 */
4626 if (likely(rv > 0)) {
4627 received += rv;
4628 buf += rv;
4629 } else if (rv == 0) {
4630 dev_err(DEV, "meta connection shut down by peer.\n");
4631 goto reconnect;
4632 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004633 /* If the data socket received something meanwhile,
4634 * that is good enough: peer is still alive. */
Philipp Reisner31890f42011-01-19 14:12:51 +01004635 if (time_after(mdev->tconn->last_received,
Philipp Reisnere42325a2011-01-19 13:55:45 +01004636 jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004637 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004638 if (ping_timeout_active) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004639 dev_err(DEV, "PingAck did not arrive in time.\n");
4640 goto reconnect;
4641 }
Philipp Reisnere43ef192011-02-07 14:40:40 +01004642 set_bit(SEND_PING, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643 continue;
4644 } else if (rv == -EINTR) {
4645 continue;
4646 } else {
4647 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
4648 goto reconnect;
4649 }
4650
4651 if (received == expect && cmd == NULL) {
Philipp Reisnerce243852011-02-07 17:27:47 +01004652 if (!decode_header(mdev->tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004654 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004655 if (unlikely(cmd == NULL)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004656 dev_err(DEV, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004657 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004658 goto disconnect;
4659 }
4660 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004661 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004662 dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004663 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004665 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666 }
4667 if (received == expect) {
Philipp Reisner31890f42011-01-19 14:12:51 +01004668 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669 D_ASSERT(cmd != NULL);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004670 if (!cmd->process(mdev, pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004671 goto reconnect;
4672
Lars Ellenbergf36af182011-03-09 22:44:55 +01004673 /* the idle_timeout (ping-int)
4674 * has been restored in got_PingAck() */
4675 if (cmd == get_asender_cmd(P_PING_ACK))
4676 ping_timeout_active = 0;
4677
Philipp Reisnerb411b362009-09-25 16:07:19 -07004678 buf = h;
4679 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004680 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004681 cmd = NULL;
4682 }
4683 }
4684
4685 if (0) {
4686reconnect:
4687 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004688 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689 }
4690 if (0) {
4691disconnect:
4692 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004693 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694 }
Philipp Reisner808e37b2011-02-07 14:44:14 +01004695 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004696
4697 D_ASSERT(mdev->state.conn < C_CONNECTED);
4698 dev_info(DEV, "asender terminated\n");
4699
4700 return 0;
4701}