blob: 24907877cae4330ecd6bf94ff05227b1fdbbb95c [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020067static int drbd_disconnected(struct drbd_device *device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204 struct list_head *le, *tle;
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200211 list_for_each_safe(le, tle, &device->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100212 peer_req = list_entry(le, struct drbd_peer_request, w.list);
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200213 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 break;
215 list_move(le, to_be_freed);
216 }
217}
218
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200219static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220{
221 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100222 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200224 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200225 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200226 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700227
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100228 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200229 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230}
231
232/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200233 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200234 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200235 * @number: number of pages requested
236 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700237 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 * Tries to allocate number pages, first from our own page pool, then from
239 * the kernel, unless this allocation would exceed the max_buffers setting.
240 * Possibly retry until DRBD frees sufficient pages somewhere else.
241 *
242 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200244struct page *drbd_alloc_pages(struct drbd_device *device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200245 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200248 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200250 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200252 /* Yes, we may run up to @number over max_buffers. If we
253 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200254 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200255 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 mxb = nc ? nc->max_buffers : 1000000;
257 rcu_read_unlock();
258
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200259 if (atomic_read(&device->pp_in_use) < mxb)
260 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700261
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200262 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700263 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700266
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200267 if (atomic_read(&device->pp_in_use) < mxb) {
268 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 if (page)
270 break;
271 }
272
273 if (!retry)
274 break;
275
276 if (signal_pending(current)) {
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200277 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278 break;
279 }
280
281 schedule();
282 }
283 finish_wait(&drbd_pp_wait, &wait);
284
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200286 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700287 return page;
288}
289
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200290/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200291 * Is also used from inside an other spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 * Either links the page chain back to the global pool,
293 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200294static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700295{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200296 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700297 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200298
Lars Ellenberga73ff322012-06-25 19:15:38 +0200299 if (page == NULL)
300 return;
301
Philipp Reisner81a5d602011-02-22 19:53:16 -0500302 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 i = page_chain_free(page);
304 else {
305 struct page *tmp;
306 tmp = page_chain_tail(page, &i);
307 spin_lock(&drbd_pp_lock);
308 page_chain_add(&drbd_pp_pool, page, tmp);
309 drbd_pp_vacant += i;
310 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700311 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200312 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200313 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200314 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
315 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700316 wake_up(&drbd_pp_wait);
317}
318
319/*
320You need to hold the req_lock:
321 _drbd_wait_ee_list_empty()
322
323You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200324 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200325 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200326 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200328 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 drbd_clear_done_ee()
330 drbd_wait_ee_list_empty()
331*/
332
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100333struct drbd_peer_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200334drbd_alloc_peer_req(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200335 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100337 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200338 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200339 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200341 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 return NULL;
343
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
345 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700346 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200347 dev_err(DEV, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348 return NULL;
349 }
350
Lars Ellenberga73ff322012-06-25 19:15:38 +0200351 if (data_size) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200352 page = drbd_alloc_pages(device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200353 if (!page)
354 goto fail;
355 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 drbd_clear_interval(&peer_req->i);
358 peer_req->i.size = data_size;
359 peer_req->i.sector = sector;
360 peer_req->i.local = false;
361 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 peer_req->epoch = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200364 peer_req->w.device = device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 peer_req->pages = page;
366 atomic_set(&peer_req->pending_bios, 0);
367 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100368 /*
369 * The block_id is opaque to the receiver. It is not endianness
370 * converted, and sent back to the sender unchanged.
371 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200376 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100377 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378 return NULL;
379}
380
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200381void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100382 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 if (peer_req->flags & EE_HAS_DIGEST)
385 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200386 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
388 D_ASSERT(drbd_interval_empty(&peer_req->i));
389 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390}
391
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200392int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700393{
394 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100395 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200397 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200399 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400 list_splice_init(list, &work_list);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200401 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405 count++;
406 }
407 return count;
408}
409
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200411 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200413static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414{
415 LIST_HEAD(work_list);
416 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100417 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100418 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200420 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421 reclaim_finished_net_peer_reqs(device, &reclaimed);
422 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200423 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200426 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
428 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200429 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430 * all ignore the last argument.
431 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100432 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100433 int err2;
434
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100436 err2 = peer_req->w.cb(&peer_req->w, !!err);
437 if (!err)
438 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200439 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200441 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100443 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444}
445
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200447 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448{
449 DEFINE_WAIT(wait);
450
451 /* avoids spin_lock/unlock
452 * and calling prepare_to_wait in the fast path */
453 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200455 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100456 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200457 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200458 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459 }
460}
461
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200463 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700464{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200465 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200466 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200467 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468}
469
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100470static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
472 mm_segment_t oldfs;
473 struct kvec iov = {
474 .iov_base = buf,
475 .iov_len = size,
476 };
477 struct msghdr msg = {
478 .msg_iovlen = 1,
479 .msg_iov = (struct iovec *)&iov,
480 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
481 };
482 int rv;
483
484 oldfs = get_fs();
485 set_fs(KERNEL_DS);
486 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
487 set_fs(oldfs);
488
489 return rv;
490}
491
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200492static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494 int rv;
495
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200496 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200498 if (rv < 0) {
499 if (rv == -ECONNRESET)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200500 conn_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 else if (rv != -ERESTARTSYS)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 conn_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200503 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200504 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200505 long t;
506 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200507 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200508 rcu_read_unlock();
509
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200510 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200511
Philipp Reisner599377a2012-08-17 14:50:22 +0200512 if (t)
513 goto out;
514 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200515 conn_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200516 }
517
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200519 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520
Philipp Reisner599377a2012-08-17 14:50:22 +0200521out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700522 return rv;
523}
524
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200525static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100526{
527 int err;
528
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200529 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100530 if (err != size) {
531 if (err >= 0)
532 err = -EIO;
533 } else
534 err = 0;
535 return err;
536}
537
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200538static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100539{
540 int err;
541
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200542 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 if (err && !signal_pending(current))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200544 conn_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100545 return err;
546}
547
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200548/* quoting tcp(7):
549 * On individual connections, the socket buffer size must be set prior to the
550 * listen(2) or connect(2) calls in order to have it take effect.
551 * This is our wrapper to do so.
552 */
553static void drbd_setbufsize(struct socket *sock, unsigned int snd,
554 unsigned int rcv)
555{
556 /* open coded SO_SNDBUF, SO_RCVBUF */
557 if (snd) {
558 sock->sk->sk_sndbuf = snd;
559 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
560 }
561 if (rcv) {
562 sock->sk->sk_rcvbuf = rcv;
563 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
564 }
565}
566
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200567static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700568{
569 const char *what;
570 struct socket *sock;
571 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200572 struct sockaddr_in6 peer_in6;
573 struct net_conf *nc;
574 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200575 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700576 int disconnect_on_error = 1;
577
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200579 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580 if (!nc) {
581 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200584 sndbuf_size = nc->sndbuf_size;
585 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200586 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200587 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200589 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
590 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200592 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200593 src_in6.sin6_port = 0;
594 else
595 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
596
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200597 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
598 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700599
600 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200601 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
602 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 if (err < 0) {
604 sock = NULL;
605 goto out;
606 }
607
608 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200609 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611
612 /* explicitly bind to the configured IP as source IP
613 * for the outgoing connections.
614 * This is needed for multihomed hosts and to be
615 * able to use lo: interfaces for drbd.
616 * Make sure to use 0 as port number, so linux selects
617 * a free one dynamically.
618 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200620 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200628 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629
630out:
631 if (err < 0) {
632 if (sock) {
633 sock_release(sock);
634 sock = NULL;
635 }
636 switch (-err) {
637 /* timeout, busy, signal pending */
638 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
639 case EINTR: case ERESTARTSYS:
640 /* peer not (yet) available, network problem */
641 case ECONNREFUSED: case ENETUNREACH:
642 case EHOSTDOWN: case EHOSTUNREACH:
643 disconnect_on_error = 0;
644 break;
645 default:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200646 conn_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 }
648 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200649 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200651
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 return sock;
653}
654
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200656 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200657 struct socket *s_listen;
658 struct completion door_bell;
659 void (*original_sk_state_change)(struct sock *sk);
660
661};
662
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200663static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200667
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200668 state_change = ad->original_sk_state_change;
669 if (sk->sk_state == TCP_ESTABLISHED)
670 complete(&ad->door_bell);
671 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200672}
673
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200674static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200678 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 const char *what;
681
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200683 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200684 if (!nc) {
685 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200686 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 sndbuf_size = nc->sndbuf_size;
689 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200690 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200692 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
693 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694
695 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200696 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200697 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 if (err) {
699 s_listen = NULL;
700 goto out;
701 }
702
Philipp Reisner98683652012-11-09 14:18:43 +0100703 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705
706 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 if (err < 0)
709 goto out;
710
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200711 ad->s_listen = s_listen;
712 write_lock_bh(&s_listen->sk->sk_callback_lock);
713 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200714 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200715 s_listen->sk->sk_user_data = ad;
716 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
Philipp Reisner2820fd32012-07-12 10:22:48 +0200718 what = "listen";
719 err = s_listen->ops->listen(s_listen, 5);
720 if (err < 0)
721 goto out;
722
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200723 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724out:
725 if (s_listen)
726 sock_release(s_listen);
727 if (err < 0) {
728 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200729 conn_err(connection, "%s failed, err = %d\n", what, err);
730 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 }
732 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200734 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200735}
736
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200737static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
738{
739 write_lock_bh(&sk->sk_callback_lock);
740 sk->sk_state_change = ad->original_sk_state_change;
741 sk->sk_user_data = NULL;
742 write_unlock_bh(&sk->sk_callback_lock);
743}
744
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200745static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200746{
747 int timeo, connect_int, err = 0;
748 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200749 struct net_conf *nc;
750
751 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200752 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200753 if (!nc) {
754 rcu_read_unlock();
755 return NULL;
756 }
757 connect_int = nc->connect_int;
758 rcu_read_unlock();
759
760 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700761 /* 28.5% random jitter */
762 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200763
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200764 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
765 if (err <= 0)
766 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200767
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200768 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700769 if (err < 0) {
770 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200771 conn_err(connection, "accept failed, err = %d\n", err);
772 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773 }
774 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200776 if (s_estab)
777 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778
779 return s_estab;
780}
781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 enum drbd_packet cmd)
786{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200788 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200789 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790}
791
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700793{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200794 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200795 struct packet_info pi;
796 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200798 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200799 if (err != header_size) {
800 if (err >= 0)
801 err = -EIO;
802 return err;
803 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200804 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200805 if (err)
806 return err;
807 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808}
809
810/**
811 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 * @sock: pointer to the pointer to the socket.
813 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100814static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815{
816 int rr;
817 char tb[4];
818
819 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100820 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823
824 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100825 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 } else {
827 sock_release(*sock);
828 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100829 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 }
831}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100832/* Gets called if a connection is established, or if a new minor gets created
833 in a connection */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200834int drbd_connected(struct drbd_device *device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100835{
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100836 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100837
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200838 atomic_set(&device->packet_seq, 0);
839 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100840
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200841 device->state_mutex = first_peer_device(device)->connection->agreed_pro_version < 100 ?
842 &first_peer_device(device)->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200843 &device->own_state_mutex;
Philipp Reisner8410da8f02011-02-11 20:11:10 +0100844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200845 err = drbd_send_sync_param(device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100846 if (!err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200847 err = drbd_send_sizes(device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100848 if (!err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200849 err = drbd_send_uuids(device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100850 if (!err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200851 err = drbd_send_current_state(device);
852 clear_bit(USE_DEGR_WFC_T, &device->flags);
853 clear_bit(RESIZE_PENDING, &device->flags);
854 atomic_set(&device->ap_in_flight, 0);
855 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100856 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100857}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700858
859/*
860 * return values:
861 * 1 yes, we have a valid connection
862 * 0 oops, did not work out, please try again
863 * -1 peer talks different language,
864 * no point in trying again, please go standalone.
865 * -2 We do not have a network config...
866 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200867static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868{
Philipp Reisner7da35862011-12-19 22:42:56 +0100869 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200870 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200871 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200872 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200873 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200874 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200875 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200876 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200877 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
878 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200880 clear_bit(DISCONNECT_SENT, &connection->flags);
881 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882 return -2;
883
Philipp Reisner7da35862011-12-19 22:42:56 +0100884 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200885 sock.sbuf = connection->data.sbuf;
886 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100887 sock.socket = NULL;
888 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200889 msock.sbuf = connection->meta.sbuf;
890 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100891 msock.socket = NULL;
892
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100893 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200894 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700895
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200896 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200897 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898
899 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200900 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200902 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100904 if (!sock.socket) {
905 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200906 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100907 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200908 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100909 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200910 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200912 conn_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 goto out_release_sockets;
914 }
915 }
916
Philipp Reisner7da35862011-12-19 22:42:56 +0100917 if (sock.socket && msock.socket) {
918 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200919 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100920 timeout = nc->ping_timeo * HZ / 10;
921 rcu_read_unlock();
922 schedule_timeout_interruptible(timeout);
923 ok = drbd_socket_okay(&sock.socket);
924 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925 if (ok)
926 break;
927 }
928
929retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200930 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200932 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100933 drbd_socket_okay(&sock.socket);
934 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200935 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200936 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100937 if (sock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200938 conn_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100939 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200940 sock.socket = s;
941 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100943 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200945 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200946 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 if (msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200948 conn_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100949 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200950 msock.socket = s;
951 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100953 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954 break;
955 default:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200956 conn_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200958randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700959 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 goto retry;
961 }
962 }
963
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200964 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700965 goto out_release_sockets;
966 if (signal_pending(current)) {
967 flush_signals(current);
968 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200969 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 goto out_release_sockets;
971 }
972
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200973 ok = drbd_socket_okay(&sock.socket);
974 ok = drbd_socket_okay(&msock.socket) && ok;
975 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200977 if (ad.s_listen)
978 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979
Philipp Reisner98683652012-11-09 14:18:43 +0100980 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
981 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982
Philipp Reisner7da35862011-12-19 22:42:56 +0100983 sock.socket->sk->sk_allocation = GFP_NOIO;
984 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985
Philipp Reisner7da35862011-12-19 22:42:56 +0100986 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
987 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200990 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100991 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200992 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700993 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200994 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200995 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996
Philipp Reisner7da35862011-12-19 22:42:56 +0100997 sock.socket->sk->sk_sndtimeo =
998 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200999
Philipp Reisner7da35862011-12-19 22:42:56 +01001000 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001001 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001002 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001003 rcu_read_unlock();
1004
Philipp Reisner7da35862011-12-19 22:42:56 +01001005 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
1007 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001008 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001009 drbd_tcp_nodelay(sock.socket);
1010 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001012 connection->data.socket = sock.socket;
1013 connection->meta.socket = msock.socket;
1014 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001016 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017 if (h <= 0)
1018 return h;
1019
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001020 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001021 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001022 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001023 case -1:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001024 conn_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001026 case 0:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001027 conn_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001028 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 }
1030 }
1031
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001032 connection->data.socket->sk->sk_sndtimeo = timeout;
1033 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001035 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001036 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001037
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001038 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001039
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001040 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001041 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1042 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001043 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001044 rcu_read_unlock();
1045
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001046 /* Prevent a race between resync-handshake and
1047 * being promoted to Primary.
1048 *
1049 * Grab and release the state mutex, so we know that any current
1050 * drbd_set_role() is finished, and any incoming drbd_set_role
1051 * will see the STATE_SENT flag, and wait for it to be cleared.
1052 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001053 mutex_lock(device->state_mutex);
1054 mutex_unlock(device->state_mutex);
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001055
Philipp Reisner08b165b2011-09-05 16:22:33 +02001056 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001057 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001058 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001059 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001060
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 drbd_connected(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001062 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001063 rcu_read_lock();
1064 }
1065 rcu_read_unlock();
1066
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001067 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1068 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1069 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001070 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001071 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001072
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001073 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 mutex_lock(&connection->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001076 /* The discard_my_data flag is a single-shot modifier to the next
1077 * connection attempt, the handshake of which is now well underway.
1078 * No need for rcu style copying of the whole struct
1079 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001080 connection->net_conf->discard_my_data = 0;
1081 mutex_unlock(&connection->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001082
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001083 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084
1085out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001086 if (ad.s_listen)
1087 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001088 if (sock.socket)
1089 sock_release(sock.socket);
1090 if (msock.socket)
1091 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092 return -1;
1093}
1094
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001095static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001097 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001099 if (header_size == sizeof(struct p_header100) &&
1100 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1101 struct p_header100 *h = header;
1102 if (h->pad != 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001103 conn_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001104 return -EINVAL;
1105 }
1106 pi->vnr = be16_to_cpu(h->volume);
1107 pi->cmd = be16_to_cpu(h->command);
1108 pi->size = be32_to_cpu(h->length);
1109 } else if (header_size == sizeof(struct p_header95) &&
1110 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001111 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001112 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001113 pi->size = be32_to_cpu(h->length);
1114 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001115 } else if (header_size == sizeof(struct p_header80) &&
1116 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1117 struct p_header80 *h = header;
1118 pi->cmd = be16_to_cpu(h->command);
1119 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001120 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001121 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001122 conn_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001123 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001124 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001125 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001126 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001127 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001128 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001129}
1130
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001131static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001132{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001133 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001134 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001135
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001136 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001137 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001138 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001139
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001140 err = decode_header(connection, buffer, pi);
1141 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001143 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144}
1145
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001146static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147{
1148 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001149 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001150 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001151
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001152 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001153 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001154 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1155 struct drbd_device *device = peer_device->device;
1156
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001157 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001158 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001159 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001161
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001162 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001163 GFP_NOIO, NULL);
1164 if (rv) {
1165 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1166 /* would rather check on EOPNOTSUPP, but that is not reliable.
1167 * don't try again for ANY return value != 0
1168 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001169 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001170 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001171 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001172 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001173
1174 rcu_read_lock();
1175 if (rv)
1176 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001177 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001178 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180}
1181
1182/**
1183 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001184 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185 * @epoch: Epoch object.
1186 * @ev: Epoch event.
1187 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001188static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001189 struct drbd_epoch *epoch,
1190 enum epoch_event ev)
1191{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001192 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001193 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 enum finish_epoch rv = FE_STILL_LIVE;
1195
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001196 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 do {
1198 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199
1200 epoch_size = atomic_read(&epoch->epoch_size);
1201
1202 switch (ev & ~EV_CLEANUP) {
1203 case EV_PUT:
1204 atomic_dec(&epoch->active);
1205 break;
1206 case EV_GOT_BARRIER_NR:
1207 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 break;
1209 case EV_BECAME_LAST:
1210 /* nothing to do*/
1211 break;
1212 }
1213
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214 if (epoch_size != 0 &&
1215 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001216 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001218 spin_unlock(&connection->epoch_lock);
1219 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1220 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001222#if 0
1223 /* FIXME: dec unacked on connection, once we have
1224 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001225 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001226 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001227#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001229 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1231 list_del(&epoch->list);
1232 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001233 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 kfree(epoch);
1235
1236 if (rv == FE_STILL_LIVE)
1237 rv = FE_DESTROYED;
1238 } else {
1239 epoch->flags = 0;
1240 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001241 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 if (rv == FE_STILL_LIVE)
1243 rv = FE_RECYCLED;
1244 }
1245 }
1246
1247 if (!next_epoch)
1248 break;
1249
1250 epoch = next_epoch;
1251 } while (1);
1252
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001253 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001254
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255 return rv;
1256}
1257
1258/**
1259 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001260 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261 * @wo: Write ordering method to try.
1262 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001263void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001265 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001266 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001268 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 static char *write_ordering_str[] = {
1270 [WO_none] = "none",
1271 [WO_drain_io] = "drain",
1272 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273 };
1274
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001275 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001277 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001278 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1279 struct drbd_device *device = peer_device->device;
1280
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001281 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001282 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001283 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001284
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001285 if (wo == WO_bdev_flush && !dc->disk_flushes)
1286 wo = WO_drain_io;
1287 if (wo == WO_drain_io && !dc->disk_drain)
1288 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001289 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001290 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001291 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001292 connection->write_ordering = wo;
1293 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
1294 conn_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295}
1296
1297/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001298 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001299 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001300 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001301 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001302 *
1303 * May spread the pages to multiple bios,
1304 * depending on bio_add_page restrictions.
1305 *
1306 * Returns 0 if all bios have been submitted,
1307 * -ENOMEM if we could not allocate enough bios,
1308 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1309 * single page to an empty bio (which should never happen and likely indicates
1310 * that the lower level IO stack is in some way broken). This has been observed
1311 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001312 */
1313/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001314int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001315 struct drbd_peer_request *peer_req,
1316 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001317{
1318 struct bio *bios = NULL;
1319 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001320 struct page *page = peer_req->pages;
1321 sector_t sector = peer_req->i.sector;
1322 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001323 unsigned n_bios = 0;
1324 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001325 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001326
1327 /* In most cases, we will only need one bio. But in case the lower
1328 * level restrictions happen to be different at this offset on this
1329 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001330 * request in more than one bio.
1331 *
1332 * Plain bio_alloc is good enough here, this is no DRBD internally
1333 * generated bio, but a bio allocated on behalf of the peer.
1334 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001335next_bio:
1336 bio = bio_alloc(GFP_NOIO, nr_pages);
1337 if (!bio) {
1338 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1339 goto fail;
1340 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001341 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001342 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001343 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001344 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001345 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001346 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001347
1348 bio->bi_next = bios;
1349 bios = bio;
1350 ++n_bios;
1351
1352 page_chain_for_each(page) {
1353 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1354 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001355 /* A single page must always be possible!
1356 * But in case it fails anyways,
1357 * we deal with it, and complain (below). */
1358 if (bio->bi_vcnt == 0) {
1359 dev_err(DEV,
1360 "bio_add_page failed for len=%u, "
1361 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001362 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001363 err = -ENOSPC;
1364 goto fail;
1365 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001366 goto next_bio;
1367 }
1368 ds -= len;
1369 sector += len >> 9;
1370 --nr_pages;
1371 }
1372 D_ASSERT(page == NULL);
1373 D_ASSERT(ds == 0);
1374
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001375 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001376 do {
1377 bio = bios;
1378 bios = bios->bi_next;
1379 bio->bi_next = NULL;
1380
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001381 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001382 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001383 return 0;
1384
1385fail:
1386 while (bios) {
1387 bio = bios;
1388 bios = bios->bi_next;
1389 bio_put(bio);
1390 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001391 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001392}
1393
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001394static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001395 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001396{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001397 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001398
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001399 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001400 drbd_clear_interval(i);
1401
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001402 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001403 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001404 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001405}
1406
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001407static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001408{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001409 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001410 int vnr;
1411
1412 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001413 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1414 struct drbd_device *device = peer_device->device;
1415
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001416 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001417 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001418 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001419 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001420 rcu_read_lock();
1421 }
1422 rcu_read_unlock();
1423}
1424
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001425static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001427 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001428 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429 struct drbd_epoch *epoch;
1430
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001431 /* FIXME these are unacked on connection,
1432 * not a specific (peer)device.
1433 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001434 connection->current_epoch->barrier_nr = p->barrier;
1435 connection->current_epoch->connection = connection;
1436 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437
1438 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1439 * the activity log, which means it would not be resynced in case the
1440 * R_PRIMARY crashes now.
1441 * Therefore we must send the barrier_ack after the barrier request was
1442 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001443 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444 case WO_none:
1445 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001446 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001447
1448 /* receiver context, in the writeout path of the other node.
1449 * avoid potential distributed deadlock */
1450 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1451 if (epoch)
1452 break;
1453 else
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001454 conn_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001455 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456
1457 case WO_bdev_flush:
1458 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001459 conn_wait_active_ee_empty(connection);
1460 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001461
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001462 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001463 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1464 if (epoch)
1465 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
1467
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001468 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001469 default:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001470 conn_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001471 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472 }
1473
1474 epoch->flags = 0;
1475 atomic_set(&epoch->epoch_size, 0);
1476 atomic_set(&epoch->active, 0);
1477
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001478 spin_lock(&connection->epoch_lock);
1479 if (atomic_read(&connection->current_epoch->epoch_size)) {
1480 list_add(&epoch->list, &connection->current_epoch->list);
1481 connection->current_epoch = epoch;
1482 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483 } else {
1484 /* The current_epoch got recycled while we allocated this one... */
1485 kfree(epoch);
1486 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001487 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001489 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490}
1491
1492/* used from receive_RSDataReply (recv_resync_read)
1493 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001494static struct drbd_peer_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001495read_in_block(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001496 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001498 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001499 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001501 int dgs, ds, err;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001502 void *dig_in = first_peer_device(device)->connection->int_dig_in;
1503 void *dig_vv = first_peer_device(device)->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001504 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001505
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001506 dgs = 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001507 if (first_peer_device(device)->connection->peer_integrity_tfm) {
1508 dgs = crypto_hash_digestsize(first_peer_device(device)->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001509 /*
1510 * FIXME: Receive the incoming digest into the receive buffer
1511 * here, together with its struct p_data?
1512 */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001513 err = drbd_recv_all_warn(first_peer_device(device)->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001514 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001516 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517 }
1518
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001519 if (!expect(IS_ALIGNED(data_size, 512)))
1520 return NULL;
1521 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1522 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523
Lars Ellenberg66660322010-04-06 12:15:04 +02001524 /* even though we trust out peer,
1525 * we sometimes have to double check. */
1526 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001527 dev_err(DEV, "request from peer beyond end of local disk: "
1528 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001529 (unsigned long long)capacity,
1530 (unsigned long long)sector, data_size);
1531 return NULL;
1532 }
1533
Philipp Reisnerb411b362009-09-25 16:07:19 -07001534 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1535 * "criss-cross" setup, that might cause write-out on some other DRBD,
1536 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001537 peer_req = drbd_alloc_peer_req(device, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001538 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001540
Lars Ellenberga73ff322012-06-25 19:15:38 +02001541 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001542 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001543
Philipp Reisnerb411b362009-09-25 16:07:19 -07001544 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001545 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001546 page_chain_for_each(page) {
1547 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001548 data = kmap(page);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001549 err = drbd_recv_all_warn(first_peer_device(device)->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001550 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001551 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1552 data[0] = data[0] ^ (unsigned long)-1;
1553 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001555 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001556 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557 return NULL;
1558 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001559 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560 }
1561
1562 if (dgs) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001563 drbd_csum_ee(device, first_peer_device(device)->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001565 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1566 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001567 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568 return NULL;
1569 }
1570 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001571 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001572 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573}
1574
1575/* drbd_drain_block() just takes a data block
1576 * out of the socket input buffer, and discards it.
1577 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001578static int drbd_drain_block(struct drbd_device *device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579{
1580 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001581 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582 void *data;
1583
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001584 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001585 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001586
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001587 page = drbd_alloc_pages(device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588
1589 data = kmap(page);
1590 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001591 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1592
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001593 err = drbd_recv_all_warn(first_peer_device(device)->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001594 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001596 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 }
1598 kunmap(page);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001599 drbd_free_pages(device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001600 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601}
1602
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001603static int recv_dless_read(struct drbd_device *device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 sector_t sector, int data_size)
1605{
Kent Overstreet79886132013-11-23 17:19:00 -08001606 struct bio_vec bvec;
1607 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001609 int dgs, err, expect;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001610 void *dig_in = first_peer_device(device)->connection->int_dig_in;
1611 void *dig_vv = first_peer_device(device)->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001613 dgs = 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001614 if (first_peer_device(device)->connection->peer_integrity_tfm) {
1615 dgs = crypto_hash_digestsize(first_peer_device(device)->connection->peer_integrity_tfm);
1616 err = drbd_recv_all_warn(first_peer_device(device)->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001617 if (err)
1618 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001619 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620 }
1621
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622 /* optimistically update recv_cnt. if receiving fails below,
1623 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001624 device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625
1626 bio = req->master_bio;
Kent Overstreet4f024f32013-10-11 15:44:27 -07001627 D_ASSERT(sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628
Kent Overstreet79886132013-11-23 17:19:00 -08001629 bio_for_each_segment(bvec, bio, iter) {
1630 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1631 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001632 err = drbd_recv_all_warn(first_peer_device(device)->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001633 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001634 if (err)
1635 return err;
1636 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637 }
1638
1639 if (dgs) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001640 drbd_csum_bio(device, first_peer_device(device)->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641 if (memcmp(dig_in, dig_vv, dgs)) {
1642 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001643 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644 }
1645 }
1646
1647 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001648 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649}
1650
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001651/*
1652 * e_end_resync_block() is called in asender context via
1653 * drbd_finish_peer_reqs().
1654 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001655static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001657 struct drbd_peer_request *peer_req =
1658 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001659 struct drbd_device *device = w->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001660 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001661 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001663 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001665 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001666 drbd_set_in_sync(device, sector, peer_req->i.size);
1667 err = drbd_send_ack(device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668 } else {
1669 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001670 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001672 err = drbd_send_ack(device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001674 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001676 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677}
1678
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001679static int recv_resync_read(struct drbd_device *device, sector_t sector, int data_size) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001681 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001683 peer_req = read_in_block(device, ID_SYNCER, sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001684 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001685 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001687 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001688
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001689 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690 /* corresponding dec_unacked() in e_end_resync_block()
1691 * respective _drbd_clear_done_ee */
1692
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001693 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001694
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001695 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001696 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001697 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001699 atomic_add(data_size >> 9, &device->rs_sect_ev);
1700 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001701 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001702
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001703 /* don't care for the reason here */
1704 dev_err(DEV, "submit failed, triggering re-connect\n");
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001705 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001706 list_del(&peer_req->w.list);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001707 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001708
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001709 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001710fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001711 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001712 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713}
1714
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001715static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001716find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001717 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718{
1719 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001720
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001721 /* Request object according to our peer */
1722 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001723 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001724 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001725 if (!missing_ok) {
Andreas Gruenbacher5af172e2011-07-15 09:43:23 +02001726 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001727 (unsigned long)id, (unsigned long long)sector);
1728 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001729 return NULL;
1730}
1731
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001732static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001734 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001735 struct drbd_request *req;
1736 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001737 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001738 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001739
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001740 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001741 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001742 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743
1744 sector = be64_to_cpu(p->sector);
1745
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001746 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001747 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001748 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001749 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001750 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751
Bart Van Assche24c48302011-05-21 18:32:29 +02001752 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 * special casing it there for the various failure cases.
1754 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001755 err = recv_dless_read(device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001756 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001757 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758 /* else: nothing. handled from drbd_disconnect...
1759 * I don't think we may complete this just yet
1760 * in case we are "on-disconnect: freeze" */
1761
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001762 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763}
1764
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001765static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001767 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001769 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001770 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001771
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001772 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001773 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001774 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775
1776 sector = be64_to_cpu(p->sector);
1777 D_ASSERT(p->block_id == ID_SYNCER);
1778
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001779 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780 /* data is submitted to disk within recv_resync_read.
1781 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001782 * or in drbd_peer_request_endio. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001783 err = recv_resync_read(device, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 } else {
1785 if (__ratelimit(&drbd_ratelimit_state))
1786 dev_err(DEV, "Can not write resync data to local disk.\n");
1787
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001788 err = drbd_drain_block(device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001790 drbd_send_ack_dp(device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791 }
1792
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001793 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001794
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001795 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796}
1797
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001798static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001799 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001801 struct drbd_interval *i;
1802 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001804 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001805 if (!i->local)
1806 continue;
1807 req = container_of(i, struct drbd_request, i);
1808 if (req->rq_state & RQ_LOCAL_PENDING ||
1809 !(req->rq_state & RQ_POSTPONED))
1810 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001811 /* as it is RQ_POSTPONED, this will cause it to
1812 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001813 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001814 }
1815}
1816
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001817/*
1818 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001820static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001822 struct drbd_peer_request *peer_req =
1823 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001824 struct drbd_device *device = w->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001825 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001826 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827
Philipp Reisner303d1442011-04-13 16:24:47 -07001828 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001829 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001830 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1831 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001832 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001834 err = drbd_send_ack(device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001835 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001836 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001838 err = drbd_send_ack(device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839 /* we expect it to be marked out of sync anyways...
1840 * maybe assert this? */
1841 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001842 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843 }
1844 /* we delete from the conflict detection hash _after_ we sent out the
1845 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001846 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001847 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001848 D_ASSERT(!drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001849 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001850 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001851 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001852 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001853 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001854 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001856 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001857
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001858 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001859}
1860
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001861static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001862{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001863 struct drbd_device *device = w->device;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001864 struct drbd_peer_request *peer_req =
1865 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001866 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001867
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001868 err = drbd_send_ack(device, ack, peer_req);
1869 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001870
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001871 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001872}
1873
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001874static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001875{
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001876 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001877}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001878
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001879static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001880{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001881 struct drbd_connection *connection = first_peer_device(w->device)->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001882
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001883 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001884 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001885}
1886
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001887static bool seq_greater(u32 a, u32 b)
1888{
1889 /*
1890 * We assume 32-bit wrap-around here.
1891 * For 24-bit wrap-around, we would have to shift:
1892 * a <<= 8; b <<= 8;
1893 */
1894 return (s32)a - (s32)b > 0;
1895}
1896
1897static u32 seq_max(u32 a, u32 b)
1898{
1899 return seq_greater(a, b) ? a : b;
1900}
1901
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001902static void update_peer_seq(struct drbd_device *device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001903{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001904 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001905
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001906 if (test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001907 spin_lock(&device->peer_seq_lock);
1908 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1909 device->peer_seq = newest_peer_seq;
1910 spin_unlock(&device->peer_seq_lock);
1911 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001912 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001913 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001914 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001915}
1916
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001917static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1918{
1919 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1920}
1921
1922/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001923static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001924{
1925 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001926 bool rv = 0;
1927
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001928 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001929 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001930 if (overlaps(peer_req->i.sector, peer_req->i.size,
1931 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001932 rv = 1;
1933 break;
1934 }
1935 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001936 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001937
1938 return rv;
1939}
1940
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941/* Called from receive_Data.
1942 * Synchronize packets on sock with packets on msock.
1943 *
1944 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1945 * packet traveling on msock, they are still processed in the order they have
1946 * been sent.
1947 *
1948 * Note: we don't care for Ack packets overtaking P_DATA packets.
1949 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001950 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001951 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001952 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953 * ourselves. Correctly handles 32bit wrap around.
1954 *
1955 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1956 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1957 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1958 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1959 *
1960 * returns 0 if we may process the packet,
1961 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001962static int wait_for_and_update_peer_seq(struct drbd_device *device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963{
1964 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001965 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001966 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001967
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001968 if (!test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001969 return 0;
1970
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001971 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001973 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
1974 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001976 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001977
Philipp Reisnerb411b362009-09-25 16:07:19 -07001978 if (signal_pending(current)) {
1979 ret = -ERESTARTSYS;
1980 break;
1981 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001982
1983 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001984 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001985 rcu_read_unlock();
1986
1987 if (!tp)
1988 break;
1989
1990 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001991 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
1992 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001993 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001994 timeout = rcu_dereference(first_peer_device(device)->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001995 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001996 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001997 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001998 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001999 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002000 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001 break;
2002 }
2003 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002004 spin_unlock(&device->peer_seq_lock);
2005 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002006 return ret;
2007}
2008
Lars Ellenberg688593c2010-11-17 22:25:03 +01002009/* see also bio_flags_to_wire()
2010 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2011 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002012static unsigned long wire_flags_to_bio(struct drbd_device *device, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002013{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002014 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2015 (dpf & DP_FUA ? REQ_FUA : 0) |
2016 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2017 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002018}
2019
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002020static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002021 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002023 struct drbd_interval *i;
2024
2025 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002026 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002027 struct drbd_request *req;
2028 struct bio_and_error m;
2029
2030 if (!i->local)
2031 continue;
2032 req = container_of(i, struct drbd_request, i);
2033 if (!(req->rq_state & RQ_POSTPONED))
2034 continue;
2035 req->rq_state &= ~RQ_POSTPONED;
2036 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002037 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002038 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002039 complete_master_bio(device, &m);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002040 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002041 goto repeat;
2042 }
2043}
2044
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002045static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002046 struct drbd_peer_request *peer_req)
2047{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002048 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002049 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002050 sector_t sector = peer_req->i.sector;
2051 const unsigned int size = peer_req->i.size;
2052 struct drbd_interval *i;
2053 bool equal;
2054 int err;
2055
2056 /*
2057 * Inserting the peer request into the write_requests tree will prevent
2058 * new conflicting local requests from being added.
2059 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002060 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002061
2062 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002063 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002064 if (i == &peer_req->i)
2065 continue;
2066
2067 if (!i->local) {
2068 /*
2069 * Our peer has sent a conflicting remote request; this
2070 * should not happen in a two-node setup. Wait for the
2071 * earlier peer request to complete.
2072 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002073 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002074 if (err)
2075 goto out;
2076 goto repeat;
2077 }
2078
2079 equal = i->sector == sector && i->size == size;
2080 if (resolve_conflicts) {
2081 /*
2082 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002083 * overlapping request, it can be considered overwritten
2084 * and thus superseded; otherwise, it will be retried
2085 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002086 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002087 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002088 (i->size >> 9) >= sector + (size >> 9);
2089
2090 if (!equal)
2091 dev_alert(DEV, "Concurrent writes detected: "
2092 "local=%llus +%u, remote=%llus +%u, "
2093 "assuming %s came first\n",
2094 (unsigned long long)i->sector, i->size,
2095 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002096 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002097
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002098 inc_unacked(device);
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002099 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002100 e_send_retry_write;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002101 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002102 wake_asender(first_peer_device(device)->connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002103
2104 err = -ENOENT;
2105 goto out;
2106 } else {
2107 struct drbd_request *req =
2108 container_of(i, struct drbd_request, i);
2109
2110 if (!equal)
2111 dev_alert(DEV, "Concurrent writes detected: "
2112 "local=%llus +%u, remote=%llus +%u\n",
2113 (unsigned long long)i->sector, i->size,
2114 (unsigned long long)sector, size);
2115
2116 if (req->rq_state & RQ_LOCAL_PENDING ||
2117 !(req->rq_state & RQ_POSTPONED)) {
2118 /*
2119 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002120 * decide if this request has been superseded
2121 * or needs to be retried.
2122 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002123 * disappear from the write_requests tree.
2124 *
2125 * In addition, wait for the conflicting
2126 * request to finish locally before submitting
2127 * the conflicting peer request.
2128 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002129 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002130 if (err) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002131 _conn_request_state(first_peer_device(device)->connection,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002132 NS(conn, C_TIMEOUT),
2133 CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002134 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002135 goto out;
2136 }
2137 goto repeat;
2138 }
2139 /*
2140 * Remember to restart the conflicting requests after
2141 * the new peer request has completed.
2142 */
2143 peer_req->flags |= EE_RESTART_REQUESTS;
2144 }
2145 }
2146 err = 0;
2147
2148 out:
2149 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002150 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002151 return err;
2152}
2153
Philipp Reisnerb411b362009-09-25 16:07:19 -07002154/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002155static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002157 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002159 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002160 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002161 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162 int rw = WRITE;
2163 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002164 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002165
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002166 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002167 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002168 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002170 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002171 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002173 err = wait_for_and_update_peer_seq(device, peer_seq);
2174 drbd_send_ack_dp(device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002175 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002176 err2 = drbd_drain_block(device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002177 if (!err)
2178 err = err2;
2179 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180 }
2181
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002182 /*
2183 * Corresponding put_ldev done either below (on various errors), or in
2184 * drbd_peer_request_endio, if we successfully submit the data at the
2185 * end of this function.
2186 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187
2188 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002189 peer_req = read_in_block(device, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002190 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002191 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002192 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193 }
2194
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002195 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002196
Lars Ellenberg688593c2010-11-17 22:25:03 +01002197 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002198 rw |= wire_flags_to_bio(device, dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002199 if (peer_req->pages == NULL) {
2200 D_ASSERT(peer_req->i.size == 0);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002201 D_ASSERT(dp_flags & DP_FLUSH);
2202 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002203
2204 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002205 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002206
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002207 spin_lock(&connection->epoch_lock);
2208 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002209 atomic_inc(&peer_req->epoch->epoch_size);
2210 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002211 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002212
Philipp Reisner302bdea2011-04-21 11:36:49 +02002213 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002214 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002215 rcu_read_unlock();
2216 if (tp) {
2217 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002218 err = wait_for_and_update_peer_seq(device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002219 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220 goto out_interrupted;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002221 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002222 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002223 if (err) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002224 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002225 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002226 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002227 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002228 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002229 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002231 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002232 update_peer_seq(device, peer_seq);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002233 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002234 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002235 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002236 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002237
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002238 if (device->state.conn == C_SYNC_TARGET)
2239 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002240
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002241 if (first_peer_device(device)->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002242 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002243 switch (rcu_dereference(first_peer_device(device)->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002244 case DRBD_PROT_C:
2245 dp_flags |= DP_SEND_WRITE_ACK;
2246 break;
2247 case DRBD_PROT_B:
2248 dp_flags |= DP_SEND_RECEIVE_ACK;
2249 break;
2250 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002251 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002252 }
2253
2254 if (dp_flags & DP_SEND_WRITE_ACK) {
2255 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002256 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 /* corresponding dec_unacked() in e_end_block()
2258 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002259 }
2260
2261 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002262 /* I really don't like it that the receiver thread
2263 * sends on the msock, but anyways */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002264 drbd_send_ack(device, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 }
2266
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002267 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002269 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002270 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2271 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002272 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002273 }
2274
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002275 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002276 if (!err)
2277 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002278
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002279 /* don't care for the reason here */
2280 dev_err(DEV, "submit failed, triggering re-connect\n");
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002281 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002282 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002283 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002284 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002285 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002286 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002287
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002289 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002290 put_ldev(device);
2291 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002292 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293}
2294
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002295/* We may throttle resync, if the lower device seems to be busy,
2296 * and current sync rate is above c_min_rate.
2297 *
2298 * To decide whether or not the lower device is busy, we use a scheme similar
2299 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2300 * (more than 64 sectors) of activity we cannot account for with our own resync
2301 * activity, it obviously is "busy".
2302 *
2303 * The current sync rate used here uses only the most recent two step marks,
2304 * to have a short time average so we can react faster.
2305 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002306int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002307{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002308 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002309 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002310 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002311 int curr_events;
2312 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002313 unsigned int c_min_rate;
2314
2315 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002316 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002317 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002318
2319 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002320 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002321 return 0;
2322
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002323 spin_lock_irq(&device->al_lock);
2324 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
Philipp Reisnere3555d82010-11-07 15:56:29 +01002325 if (tmp) {
2326 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2327 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002328 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002329 return 0;
2330 }
2331 /* Do not slow down if app IO is already waiting for this extent */
2332 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002333 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002334
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002335 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2336 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002337 atomic_read(&device->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002338
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002339 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002340 unsigned long rs_left;
2341 int i;
2342
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002343 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002344
2345 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2346 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002347 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002349 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2350 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002351 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002352 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002353
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002354 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002355 if (!dt)
2356 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002357 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002358 dbdt = Bit2KB(db/dt);
2359
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002360 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002361 throttle = 1;
2362 }
2363 return throttle;
2364}
2365
2366
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002367static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002369 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002371 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002372 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002373 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002374 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002375 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002376 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002377
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002378 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002379 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002380 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002381 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002382
2383 sector = be64_to_cpu(p->sector);
2384 size = be32_to_cpu(p->blksize);
2385
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002386 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2388 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002389 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002390 }
2391 if (sector + (size>>9) > capacity) {
2392 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2393 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002394 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002395 }
2396
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002397 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002398 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002399 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002400 case P_DATA_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002401 drbd_send_ack_rp(device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002402 break;
2403 case P_RS_DATA_REQUEST:
2404 case P_CSUM_RS_REQUEST:
2405 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002406 drbd_send_ack_rp(device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002407 break;
2408 case P_OV_REPLY:
2409 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002410 dec_rs_pending(device);
2411 drbd_send_ack_ex(device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002412 break;
2413 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002414 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002415 }
2416 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002417 dev_err(DEV, "Can not satisfy peer's read request, "
2418 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002419
Lars Ellenberga821cc42010-09-06 12:31:37 +02002420 /* drain possibly payload */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002421 return drbd_drain_block(device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422 }
2423
2424 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2425 * "criss-cross" setup, that might cause write-out on some other DRBD,
2426 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002427 peer_req = drbd_alloc_peer_req(device, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002428 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002429 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002430 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002431 }
2432
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002433 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002434 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002435 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002437 /* application IO, don't drbd_rs_begin_io */
2438 goto submit;
2439
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002441 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002442 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002443 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002444 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445 break;
2446
2447 case P_OV_REPLY:
2448 case P_CSUM_RS_REQUEST:
2449 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002450 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451 if (!di)
2452 goto out_free_e;
2453
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002454 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002455 di->digest = (((char *)di)+sizeof(struct digest_info));
2456
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002457 peer_req->digest = di;
2458 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002459
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002460 if (drbd_recv_all(first_peer_device(device)->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 goto out_free_e;
2462
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002463 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002464 D_ASSERT(first_peer_device(device)->connection->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002465 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002466 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002467 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002468 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002469 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002470 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002471 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002472 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002473 /* drbd_rs_begin_io done when we sent this request,
2474 * but accounting still needs to be done. */
2475 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002476 }
2477 break;
2478
2479 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002480 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002481 first_peer_device(device)->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002482 unsigned long now = jiffies;
2483 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002484 device->ov_start_sector = sector;
2485 device->ov_position = sector;
2486 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2487 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002488 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002489 device->rs_mark_left[i] = device->ov_left;
2490 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002491 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 dev_info(DEV, "Online Verify start sector: %llu\n",
2493 (unsigned long long)sector);
2494 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002495 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002496 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002497 break;
2498
Philipp Reisnerb411b362009-09-25 16:07:19 -07002499 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002500 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501 }
2502
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002503 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2504 * wrt the receiver, but it is not as straightforward as it may seem.
2505 * Various places in the resync start and stop logic assume resync
2506 * requests are processed in order, requeuing this on the worker thread
2507 * introduces a bunch of new code for synchronization between threads.
2508 *
2509 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2510 * "forever", throttling after drbd_rs_begin_io will lock that extent
2511 * for application writes for the same time. For now, just throttle
2512 * here, where the rest of the code expects the receiver to sleep for
2513 * a while, anyways.
2514 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002515
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002516 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2517 * this defers syncer requests for some time, before letting at least
2518 * on request through. The resync controller on the receiving side
2519 * will adapt to the incoming rate accordingly.
2520 *
2521 * We cannot throttle here if remote is Primary/SyncTarget:
2522 * we would also throttle its application reads.
2523 * In that case, throttling is done on the SyncTarget only.
2524 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002525 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002526 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002527 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002528 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002529
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002530submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002531 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002532
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002533submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002534 inc_unacked(device);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002535 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002536 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002537 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002538
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002539 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002540 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002541
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002542 /* don't care for the reason here */
2543 dev_err(DEV, "submit failed, triggering re-connect\n");
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002544 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002545 list_del(&peer_req->w.list);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002546 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002547 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2548
Philipp Reisnerb411b362009-09-25 16:07:19 -07002549out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002550 put_ldev(device);
2551 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002552 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553}
2554
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002555static int drbd_asb_recover_0p(struct drbd_device *device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002556{
2557 int self, peer, rv = -100;
2558 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002559 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002560
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002561 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2562 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002564 ch_peer = device->p_uuid[UI_SIZE];
2565 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002566
Philipp Reisner44ed1672011-04-19 17:10:19 +02002567 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002568 after_sb_0p = rcu_dereference(first_peer_device(device)->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002569 rcu_read_unlock();
2570 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571 case ASB_CONSENSUS:
2572 case ASB_DISCARD_SECONDARY:
2573 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002574 case ASB_VIOLENTLY:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002575 dev_err(DEV, "Configuration error.\n");
2576 break;
2577 case ASB_DISCONNECT:
2578 break;
2579 case ASB_DISCARD_YOUNGER_PRI:
2580 if (self == 0 && peer == 1) {
2581 rv = -1;
2582 break;
2583 }
2584 if (self == 1 && peer == 0) {
2585 rv = 1;
2586 break;
2587 }
2588 /* Else fall through to one of the other strategies... */
2589 case ASB_DISCARD_OLDER_PRI:
2590 if (self == 0 && peer == 1) {
2591 rv = 1;
2592 break;
2593 }
2594 if (self == 1 && peer == 0) {
2595 rv = -1;
2596 break;
2597 }
2598 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002599 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002600 "Using discard-least-changes instead\n");
2601 case ASB_DISCARD_ZERO_CHG:
2602 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002603 rv = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002604 ? -1 : 1;
2605 break;
2606 } else {
2607 if (ch_peer == 0) { rv = 1; break; }
2608 if (ch_self == 0) { rv = -1; break; }
2609 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002610 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002611 break;
2612 case ASB_DISCARD_LEAST_CHG:
2613 if (ch_self < ch_peer)
2614 rv = -1;
2615 else if (ch_self > ch_peer)
2616 rv = 1;
2617 else /* ( ch_self == ch_peer ) */
2618 /* Well, then use something else. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002619 rv = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620 ? -1 : 1;
2621 break;
2622 case ASB_DISCARD_LOCAL:
2623 rv = -1;
2624 break;
2625 case ASB_DISCARD_REMOTE:
2626 rv = 1;
2627 }
2628
2629 return rv;
2630}
2631
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002632static int drbd_asb_recover_1p(struct drbd_device *device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002633{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002634 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002635 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636
Philipp Reisner44ed1672011-04-19 17:10:19 +02002637 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002638 after_sb_1p = rcu_dereference(first_peer_device(device)->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002639 rcu_read_unlock();
2640 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002641 case ASB_DISCARD_YOUNGER_PRI:
2642 case ASB_DISCARD_OLDER_PRI:
2643 case ASB_DISCARD_LEAST_CHG:
2644 case ASB_DISCARD_LOCAL:
2645 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002646 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647 dev_err(DEV, "Configuration error.\n");
2648 break;
2649 case ASB_DISCONNECT:
2650 break;
2651 case ASB_CONSENSUS:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002652 hg = drbd_asb_recover_0p(device);
2653 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002654 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002655 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002656 rv = hg;
2657 break;
2658 case ASB_VIOLENTLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002659 rv = drbd_asb_recover_0p(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002660 break;
2661 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002662 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002663 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002664 hg = drbd_asb_recover_0p(device);
2665 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002666 enum drbd_state_rv rv2;
2667
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2669 * we might be here in C_WF_REPORT_PARAMS which is transient.
2670 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002671 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002672 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002673 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 } else {
2675 dev_warn(DEV, "Successfully gave up primary role.\n");
2676 rv = hg;
2677 }
2678 } else
2679 rv = hg;
2680 }
2681
2682 return rv;
2683}
2684
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002685static int drbd_asb_recover_2p(struct drbd_device *device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002686{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002687 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002688 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002689
Philipp Reisner44ed1672011-04-19 17:10:19 +02002690 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002691 after_sb_2p = rcu_dereference(first_peer_device(device)->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002692 rcu_read_unlock();
2693 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002694 case ASB_DISCARD_YOUNGER_PRI:
2695 case ASB_DISCARD_OLDER_PRI:
2696 case ASB_DISCARD_LEAST_CHG:
2697 case ASB_DISCARD_LOCAL:
2698 case ASB_DISCARD_REMOTE:
2699 case ASB_CONSENSUS:
2700 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002701 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002702 dev_err(DEV, "Configuration error.\n");
2703 break;
2704 case ASB_VIOLENTLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002705 rv = drbd_asb_recover_0p(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002706 break;
2707 case ASB_DISCONNECT:
2708 break;
2709 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002710 hg = drbd_asb_recover_0p(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002711 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002712 enum drbd_state_rv rv2;
2713
Philipp Reisnerb411b362009-09-25 16:07:19 -07002714 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2715 * we might be here in C_WF_REPORT_PARAMS which is transient.
2716 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002717 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002718 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002719 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002720 } else {
2721 dev_warn(DEV, "Successfully gave up primary role.\n");
2722 rv = hg;
2723 }
2724 } else
2725 rv = hg;
2726 }
2727
2728 return rv;
2729}
2730
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002731static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002732 u64 bits, u64 flags)
2733{
2734 if (!uuid) {
2735 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2736 return;
2737 }
2738 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2739 text,
2740 (unsigned long long)uuid[UI_CURRENT],
2741 (unsigned long long)uuid[UI_BITMAP],
2742 (unsigned long long)uuid[UI_HISTORY_START],
2743 (unsigned long long)uuid[UI_HISTORY_END],
2744 (unsigned long long)bits,
2745 (unsigned long long)flags);
2746}
2747
2748/*
2749 100 after split brain try auto recover
2750 2 C_SYNC_SOURCE set BitMap
2751 1 C_SYNC_SOURCE use BitMap
2752 0 no Sync
2753 -1 C_SYNC_TARGET use BitMap
2754 -2 C_SYNC_TARGET set BitMap
2755 -100 after split brain, disconnect
2756-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002757-1091 requires proto 91
2758-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002760static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002761{
2762 u64 self, peer;
2763 int i, j;
2764
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002765 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2766 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002767
2768 *rule_nr = 10;
2769 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2770 return 0;
2771
2772 *rule_nr = 20;
2773 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2774 peer != UUID_JUST_CREATED)
2775 return -2;
2776
2777 *rule_nr = 30;
2778 if (self != UUID_JUST_CREATED &&
2779 (peer == UUID_JUST_CREATED || peer == (u64)0))
2780 return 2;
2781
2782 if (self == peer) {
2783 int rct, dc; /* roles at crash time */
2784
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002785 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002786
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002787 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002788 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002790 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2791 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002792 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002793 drbd_uuid_move_history(device);
2794 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2795 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002797 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2798 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799 *rule_nr = 34;
2800 } else {
2801 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2802 *rule_nr = 36;
2803 }
2804
2805 return 1;
2806 }
2807
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002808 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002809
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002810 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002811 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002812
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002813 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2814 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2816
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002817 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2818 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2819 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002821 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822 *rule_nr = 35;
2823 } else {
2824 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2825 *rule_nr = 37;
2826 }
2827
2828 return -1;
2829 }
2830
2831 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002832 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2833 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002834 /* lowest bit is set when we were primary,
2835 * next bit (weight 2) is set when peer was primary */
2836 *rule_nr = 40;
2837
2838 switch (rct) {
2839 case 0: /* !self_pri && !peer_pri */ return 0;
2840 case 1: /* self_pri && !peer_pri */ return 1;
2841 case 2: /* !self_pri && peer_pri */ return -1;
2842 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002843 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844 return dc ? -1 : 1;
2845 }
2846 }
2847
2848 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002849 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002850 if (self == peer)
2851 return -1;
2852
2853 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002854 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002855 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002856 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002857 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2858 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2859 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002860 /* The last P_SYNC_UUID did not get though. Undo the last start of
2861 resync as sync source modifications of the peer's UUIDs. */
2862
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002863 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002864 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002866 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2867 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002868
Lars Ellenberg92b4ca22012-04-30 12:53:52 +02002869 dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002870 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002871
Philipp Reisnerb411b362009-09-25 16:07:19 -07002872 return -1;
2873 }
2874 }
2875
2876 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002877 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002878 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002879 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002880 if (self == peer)
2881 return -2;
2882 }
2883
2884 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002885 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2886 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 if (self == peer)
2888 return 1;
2889
2890 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002891 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002893 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002894 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2895 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2896 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897 /* The last P_SYNC_UUID did not get though. Undo the last start of
2898 resync as sync source modifications of our UUIDs. */
2899
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002900 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002901 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002903 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2904 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905
Philipp Reisner4a23f262011-01-11 17:42:17 +01002906 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002907 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2908 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909
2910 return 1;
2911 }
2912 }
2913
2914
2915 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002916 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002918 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919 if (self == peer)
2920 return 2;
2921 }
2922
2923 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002924 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2925 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926 if (self == peer && self != ((u64)0))
2927 return 100;
2928
2929 *rule_nr = 100;
2930 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002931 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002933 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934 if (self == peer)
2935 return -100;
2936 }
2937 }
2938
2939 return -1000;
2940}
2941
2942/* drbd_sync_handshake() returns the new conn state on success, or
2943 CONN_MASK (-1) on failure.
2944 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002945static enum drbd_conns drbd_sync_handshake(struct drbd_device *device, enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002946 enum drbd_disk_state peer_disk) __must_hold(local)
2947{
Philipp Reisnerb411b362009-09-25 16:07:19 -07002948 enum drbd_conns rv = C_MASK;
2949 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002950 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002951 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002952
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002953 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002955 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002956
2957 dev_info(DEV, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002958
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002959 spin_lock_irq(&device->ldev->md.uuid_lock);
2960 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
2961 drbd_uuid_dump(device, "peer", device->p_uuid,
2962 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002964 hg = drbd_uuid_compare(device, &rule_nr);
2965 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966
2967 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2968
2969 if (hg == -1000) {
2970 dev_alert(DEV, "Unrelated data, aborting!\n");
2971 return C_MASK;
2972 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002973 if (hg < -1000) {
2974 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002975 return C_MASK;
2976 }
2977
2978 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2979 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2980 int f = (hg == -100) || abs(hg) == 2;
2981 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2982 if (f)
2983 hg = hg*2;
2984 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2985 hg > 0 ? "source" : "target");
2986 }
2987
Adam Gandelman3a11a482010-04-08 16:48:23 -07002988 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002989 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07002990
Philipp Reisner44ed1672011-04-19 17:10:19 +02002991 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002992 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002993
2994 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002995 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002996 + (peer_role == R_PRIMARY);
2997 int forced = (hg == -100);
2998
2999 switch (pcount) {
3000 case 0:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003001 hg = drbd_asb_recover_0p(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003002 break;
3003 case 1:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003004 hg = drbd_asb_recover_1p(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005 break;
3006 case 2:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003007 hg = drbd_asb_recover_2p(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 break;
3009 }
3010 if (abs(hg) < 100) {
3011 dev_warn(DEV, "Split-Brain detected, %d primaries, "
3012 "automatically solved. Sync from %s node\n",
3013 pcount, (hg < 0) ? "peer" : "this");
3014 if (forced) {
3015 dev_warn(DEV, "Doing a full sync, since"
3016 " UUIDs where ambiguous.\n");
3017 hg = hg*2;
3018 }
3019 }
3020 }
3021
3022 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003023 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003025 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003026 hg = 1;
3027
3028 if (abs(hg) < 100)
3029 dev_warn(DEV, "Split-Brain detected, manually solved. "
3030 "Sync from %s node\n",
3031 (hg < 0) ? "peer" : "this");
3032 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003033 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003034 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003035 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003036
3037 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003038 /* FIXME this log message is not correct if we end up here
3039 * after an attempted attach on a diskless node.
3040 * We just refuse to attach -- well, we drop the "connection"
3041 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07003042 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003043 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 return C_MASK;
3045 }
3046
3047 if (hg > 0 && mydisk <= D_INCONSISTENT) {
3048 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
3049 return C_MASK;
3050 }
3051
3052 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003053 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003054 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003056 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057 /* fall through */
3058 case ASB_DISCONNECT:
3059 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3060 return C_MASK;
3061 case ASB_VIOLENTLY:
3062 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3063 "assumption\n");
3064 }
3065 }
3066
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003067 if (tentative || test_bit(CONN_DRY_RUN, &first_peer_device(device)->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003068 if (hg == 0)
3069 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3070 else
3071 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3072 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3073 abs(hg) >= 2 ? "full" : "bit-map based");
3074 return C_MASK;
3075 }
3076
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077 if (abs(hg) >= 2) {
3078 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003079 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003080 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 return C_MASK;
3082 }
3083
3084 if (hg > 0) { /* become sync source. */
3085 rv = C_WF_BITMAP_S;
3086 } else if (hg < 0) { /* become sync target */
3087 rv = C_WF_BITMAP_T;
3088 } else {
3089 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003090 if (drbd_bm_total_weight(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003092 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 }
3094 }
3095
3096 return rv;
3097}
3098
Philipp Reisnerf179d762011-05-16 17:31:47 +02003099static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003100{
3101 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003102 if (peer == ASB_DISCARD_REMOTE)
3103 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104
3105 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003106 if (peer == ASB_DISCARD_LOCAL)
3107 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003108
3109 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003110 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003111}
3112
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003113static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003115 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003116 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3117 int p_proto, p_discard_my_data, p_two_primaries, cf;
3118 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3119 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003120 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003121 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122
Philipp Reisnerb411b362009-09-25 16:07:19 -07003123 p_proto = be32_to_cpu(p->protocol);
3124 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3125 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3126 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003127 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003128 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003129 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003130
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003131 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003132 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003133
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003134 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003135 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003136 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003137 if (err)
3138 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003139 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140 }
3141
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003142 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003143 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003144
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003145 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003146 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003147
3148 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003149 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003150
3151 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003152 conn_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003153 goto disconnect_rcu_unlock;
3154 }
3155
3156 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003157 conn_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003158 goto disconnect_rcu_unlock;
3159 }
3160
3161 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003162 conn_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003163 goto disconnect_rcu_unlock;
3164 }
3165
3166 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003167 conn_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003168 goto disconnect_rcu_unlock;
3169 }
3170
3171 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003172 conn_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003173 goto disconnect_rcu_unlock;
3174 }
3175
3176 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003177 conn_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003178 goto disconnect_rcu_unlock;
3179 }
3180
3181 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003182 conn_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003183 goto disconnect_rcu_unlock;
3184 }
3185
3186 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187 }
3188
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003189 if (integrity_alg[0]) {
3190 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003192 /*
3193 * We can only change the peer data integrity algorithm
3194 * here. Changing our own data integrity algorithm
3195 * requires that we send a P_PROTOCOL_UPDATE packet at
3196 * the same time; otherwise, the peer has no way to
3197 * tell between which packets the algorithm should
3198 * change.
3199 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003200
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003201 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3202 if (!peer_integrity_tfm) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003203 conn_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003204 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003205 goto disconnect;
3206 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003207
3208 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3209 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3210 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3211 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003212 conn_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003213 goto disconnect;
3214 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003215 }
3216
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003217 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3218 if (!new_net_conf) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003219 conn_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003220 goto disconnect;
3221 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003223 mutex_lock(&connection->data.mutex);
3224 mutex_lock(&connection->conf_update);
3225 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003226 *new_net_conf = *old_net_conf;
3227
3228 new_net_conf->wire_protocol = p_proto;
3229 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3230 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3231 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3232 new_net_conf->two_primaries = p_two_primaries;
3233
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003234 rcu_assign_pointer(connection->net_conf, new_net_conf);
3235 mutex_unlock(&connection->conf_update);
3236 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003237
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003238 crypto_free_hash(connection->peer_integrity_tfm);
3239 kfree(connection->int_dig_in);
3240 kfree(connection->int_dig_vv);
3241 connection->peer_integrity_tfm = peer_integrity_tfm;
3242 connection->int_dig_in = int_dig_in;
3243 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003244
3245 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003246 conn_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003247 integrity_alg[0] ? integrity_alg : "(none)");
3248
3249 synchronize_rcu();
3250 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003251 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003252
Philipp Reisner44ed1672011-04-19 17:10:19 +02003253disconnect_rcu_unlock:
3254 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003255disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003256 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003257 kfree(int_dig_in);
3258 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003259 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003260 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003261}
3262
3263/* helper function
3264 * input: alg name, feature name
3265 * return: NULL (alg name was "")
3266 * ERR_PTR(error) if something goes wrong
3267 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303268static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003269struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003270 const char *alg, const char *name)
3271{
3272 struct crypto_hash *tfm;
3273
3274 if (!alg[0])
3275 return NULL;
3276
3277 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3278 if (IS_ERR(tfm)) {
3279 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3280 alg, name, PTR_ERR(tfm));
3281 return tfm;
3282 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003283 return tfm;
3284}
3285
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003286static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003287{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003288 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003289 int size = pi->size;
3290
3291 while (size) {
3292 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003293 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003294 if (s <= 0) {
3295 if (s < 0)
3296 return s;
3297 break;
3298 }
3299 size -= s;
3300 }
3301 if (size)
3302 return -EIO;
3303 return 0;
3304}
3305
3306/*
3307 * config_unknown_volume - device configuration command for unknown volume
3308 *
3309 * When a device is added to an existing connection, the node on which the
3310 * device is added first will send configuration commands to its peer but the
3311 * peer will not know about the device yet. It will warn and ignore these
3312 * commands. Once the device is added on the second node, the second node will
3313 * send the same device configuration commands, but in the other direction.
3314 *
3315 * (We can also end up here if drbd is misconfigured.)
3316 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003317static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003318{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003319 conn_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003320 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003321 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003322}
3323
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003324static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003325{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003326 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003327 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328 unsigned int header_size, data_size, exp_max_sz;
3329 struct crypto_hash *verify_tfm = NULL;
3330 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003331 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003332 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003333 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003334 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003335 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003336 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003338 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003339 if (!device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003340 return config_unknown_volume(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341
3342 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3343 : apv == 88 ? sizeof(struct p_rs_param)
3344 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003345 : apv <= 94 ? sizeof(struct p_rs_param_89)
3346 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003348 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003350 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003351 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003352 }
3353
3354 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003355 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003356 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003357 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003358 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003359 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003360 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003361 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003362 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003363 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364 D_ASSERT(data_size == 0);
3365 }
3366
3367 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003368 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003369 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3370
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003371 err = drbd_recv_all(first_peer_device(device)->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003372 if (err)
3373 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003374
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003375 mutex_lock(&first_peer_device(device)->connection->conf_update);
3376 old_net_conf = first_peer_device(device)->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003377 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003378 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3379 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003380 put_ldev(device);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003381 mutex_unlock(&first_peer_device(device)->connection->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003382 dev_err(DEV, "Allocation of new disk_conf failed\n");
3383 return -ENOMEM;
3384 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003385
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003386 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003387 *new_disk_conf = *old_disk_conf;
3388
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003389 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003390 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003391
3392 if (apv >= 88) {
3393 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003394 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3395 dev_err(DEV, "verify-alg of wrong size, "
3396 "peer wants %u, accepting only up to %u byte\n",
3397 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003398 err = -EIO;
3399 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400 }
3401
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003402 err = drbd_recv_all(first_peer_device(device)->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003403 if (err)
3404 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405 /* we expect NUL terminated string */
3406 /* but just in case someone tries to be evil */
3407 D_ASSERT(p->verify_alg[data_size-1] == 0);
3408 p->verify_alg[data_size-1] = 0;
3409
3410 } else /* apv >= 89 */ {
3411 /* we still expect NUL terminated strings */
3412 /* but just in case someone tries to be evil */
3413 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3414 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3415 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3416 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3417 }
3418
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003419 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003420 if (device->state.conn == C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003421 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003422 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003423 goto disconnect;
3424 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003425 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003426 p->verify_alg, "verify-alg");
3427 if (IS_ERR(verify_tfm)) {
3428 verify_tfm = NULL;
3429 goto disconnect;
3430 }
3431 }
3432
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003433 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003434 if (device->state.conn == C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003435 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003436 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003437 goto disconnect;
3438 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003439 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003440 p->csums_alg, "csums-alg");
3441 if (IS_ERR(csums_tfm)) {
3442 csums_tfm = NULL;
3443 goto disconnect;
3444 }
3445 }
3446
Philipp Reisner813472c2011-05-03 16:47:02 +02003447 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003448 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3449 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3450 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3451 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003452
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003453 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003454 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003455 new_plan = fifo_alloc(fifo_size);
3456 if (!new_plan) {
Philipp Reisner778f2712010-07-06 11:14:00 +02003457 dev_err(DEV, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003458 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003459 goto disconnect;
3460 }
3461 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003462 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003463
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003464 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003465 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3466 if (!new_net_conf) {
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003467 dev_err(DEV, "Allocation of new net_conf failed\n");
3468 goto disconnect;
3469 }
3470
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003471 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003472
3473 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003474 strcpy(new_net_conf->verify_alg, p->verify_alg);
3475 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003476 crypto_free_hash(first_peer_device(device)->connection->verify_tfm);
3477 first_peer_device(device)->connection->verify_tfm = verify_tfm;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003478 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3479 }
3480 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003481 strcpy(new_net_conf->csums_alg, p->csums_alg);
3482 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003483 crypto_free_hash(first_peer_device(device)->connection->csums_tfm);
3484 first_peer_device(device)->connection->csums_tfm = csums_tfm;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003485 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3486 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003487 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003488 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003489 }
3490
Philipp Reisner813472c2011-05-03 16:47:02 +02003491 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003492 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3493 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003494 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003495
3496 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003497 old_plan = device->rs_plan_s;
3498 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003499 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003500
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003501 mutex_unlock(&first_peer_device(device)->connection->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003502 synchronize_rcu();
3503 if (new_net_conf)
3504 kfree(old_net_conf);
3505 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003506 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003507
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003508 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003509
Philipp Reisner813472c2011-05-03 16:47:02 +02003510reconnect:
3511 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003512 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003513 kfree(new_disk_conf);
3514 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003515 mutex_unlock(&first_peer_device(device)->connection->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003516 return -EIO;
3517
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003519 kfree(new_plan);
3520 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003521 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003522 kfree(new_disk_conf);
3523 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003524 mutex_unlock(&first_peer_device(device)->connection->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525 /* just for completeness: actually not needed,
3526 * as this is not reached if csums_tfm was ok. */
3527 crypto_free_hash(csums_tfm);
3528 /* but free the verify_tfm again, if csums_tfm did not work out */
3529 crypto_free_hash(verify_tfm);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003530 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003531 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532}
3533
Philipp Reisnerb411b362009-09-25 16:07:19 -07003534/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003535static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536 const char *s, sector_t a, sector_t b)
3537{
3538 sector_t d;
3539 if (a == 0 || b == 0)
3540 return;
3541 d = (a > b) ? (a - b) : (b - a);
3542 if (d > (a>>3) || d > (b>>3))
3543 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3544 (unsigned long long)a, (unsigned long long)b);
3545}
3546
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003547static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003549 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003550 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003551 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003552 sector_t p_size, p_usize, my_usize;
3553 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003554 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003556 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003557 if (!device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003558 return config_unknown_volume(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003559
Philipp Reisnerb411b362009-09-25 16:07:19 -07003560 p_size = be64_to_cpu(p->d_size);
3561 p_usize = be64_to_cpu(p->u_size);
3562
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563 /* just store the peer's disk size for now.
3564 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003565 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003566
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003567 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003568 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003569 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003570 rcu_read_unlock();
3571
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003572 warn_if_differ_considerably(device, "lower level device sizes",
3573 p_size, drbd_get_max_capacity(device->ldev));
3574 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003575 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576
3577 /* if this is the first connect, or an otherwise expected
3578 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003579 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003580 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003581
3582 /* Never shrink a device with usable data during connect.
3583 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003584 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3585 drbd_get_capacity(device->this_bdev) &&
3586 device->state.disk >= D_OUTDATED &&
3587 device->state.conn < C_CONNECTED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003588 dev_err(DEV, "The peer's disk size is too small!\n");
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003589 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003590 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003591 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003592 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003593
3594 if (my_usize != p_usize) {
3595 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3596
3597 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3598 if (!new_disk_conf) {
3599 dev_err(DEV, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003600 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003601 return -ENOMEM;
3602 }
3603
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003604 mutex_lock(&first_peer_device(device)->connection->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003605 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003606 *new_disk_conf = *old_disk_conf;
3607 new_disk_conf->disk_size = p_usize;
3608
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003609 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003610 mutex_unlock(&first_peer_device(device)->connection->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003611 synchronize_rcu();
3612 kfree(old_disk_conf);
3613
3614 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3615 (unsigned long)my_usize);
3616 }
3617
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003618 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620
Philipp Reisnere89b5912010-03-24 17:11:33 +01003621 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003622 if (get_ldev(device)) {
3623 dd = drbd_determine_dev_size(device, ddsf, NULL);
3624 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003625 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003626 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003627 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003628 } else {
3629 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003630 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631 }
3632
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003633 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3634 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003635
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003636 if (get_ldev(device)) {
3637 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3638 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003639 ldsc = 1;
3640 }
3641
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003642 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003643 }
3644
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003645 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003646 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003647 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 /* we have different sizes, probably peer
3649 * needs to know my new size... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003650 drbd_send_sizes(device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003652 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3653 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3654 if (device->state.pdsk >= D_INCONSISTENT &&
3655 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003656 if (ddsf & DDSF_NO_RESYNC)
3657 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3658 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003659 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003660 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003661 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003662 }
3663 }
3664
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003665 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666}
3667
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003668static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003670 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003671 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003673 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003675 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003676 if (!device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003677 return config_unknown_volume(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003678
Philipp Reisnerb411b362009-09-25 16:07:19 -07003679 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003680 if (!p_uuid) {
3681 dev_err(DEV, "kmalloc of p_uuid failed\n");
3682 return false;
3683 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684
3685 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3686 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3687
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003688 kfree(device->p_uuid);
3689 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003691 if (device->state.conn < C_CONNECTED &&
3692 device->state.disk < D_INCONSISTENT &&
3693 device->state.role == R_PRIMARY &&
3694 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003695 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003696 (unsigned long long)device->ed_uuid);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003697 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003698 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699 }
3700
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003701 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003703 device->state.conn == C_CONNECTED &&
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003704 first_peer_device(device)->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003705 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706 (p_uuid[UI_FLAGS] & 8);
3707 if (skip_initial_sync) {
3708 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003709 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003710 "clear_n_write from receive_uuids",
3711 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003712 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3713 _drbd_uuid_set(device, UI_BITMAP, 0);
3714 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003715 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003716 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003717 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003719 put_ldev(device);
3720 } else if (device->state.disk < D_INCONSISTENT &&
3721 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003722 /* I am a diskless primary, the peer just created a new current UUID
3723 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003724 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003725 }
3726
3727 /* Before we test for the disk state, we should wait until an eventually
3728 ongoing cluster wide state change is finished. That is important if
3729 we are primary and are detaching from our disk. We need to see the
3730 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003731 mutex_lock(device->state_mutex);
3732 mutex_unlock(device->state_mutex);
3733 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3734 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003735
3736 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003737 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003739 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740}
3741
3742/**
3743 * convert_state() - Converts the peer's view of the cluster state to our point of view
3744 * @ps: The state as seen by the peer.
3745 */
3746static union drbd_state convert_state(union drbd_state ps)
3747{
3748 union drbd_state ms;
3749
3750 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003751 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003752 [C_CONNECTED] = C_CONNECTED,
3753
3754 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3755 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3756 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3757 [C_VERIFY_S] = C_VERIFY_T,
3758 [C_MASK] = C_MASK,
3759 };
3760
3761 ms.i = ps.i;
3762
3763 ms.conn = c_tab[ps.conn];
3764 ms.peer = ps.role;
3765 ms.role = ps.peer;
3766 ms.pdsk = ps.disk;
3767 ms.disk = ps.pdsk;
3768 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3769
3770 return ms;
3771}
3772
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003773static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003774{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003775 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003776 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003777 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003778 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003780 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003781 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003782 return -EIO;
3783
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 mask.i = be32_to_cpu(p->mask);
3785 val.i = be32_to_cpu(p->val);
3786
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003787 if (test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003788 mutex_is_locked(device->state_mutex)) {
3789 drbd_send_sr_reply(device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003790 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 }
3792
3793 mask = convert_state(mask);
3794 val = convert_state(val);
3795
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003796 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
3797 drbd_send_sr_reply(device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003799 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003800
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003801 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802}
3803
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003804static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003805{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003806 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003807 union drbd_state mask, val;
3808 enum drbd_state_rv rv;
3809
3810 mask.i = be32_to_cpu(p->mask);
3811 val.i = be32_to_cpu(p->val);
3812
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003813 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3814 mutex_is_locked(&connection->cstate_mutex)) {
3815 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003816 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003817 }
3818
3819 mask = convert_state(mask);
3820 val = convert_state(val);
3821
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003822 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3823 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003824
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003825 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003826}
3827
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003828static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003830 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003831 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003832 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003834 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835 int rv;
3836
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003837 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003838 if (!device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003839 return config_unknown_volume(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003840
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 peer_state.i = be32_to_cpu(p->state);
3842
3843 real_peer_disk = peer_state.disk;
3844 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003845 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3847 }
3848
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003849 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003850 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003851 os = ns = drbd_read_state(device);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003852 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003853
Lars Ellenberg545752d2011-12-05 14:39:25 +01003854 /* If some other part of the code (asender thread, timeout)
3855 * already decided to close the connection again,
3856 * we must not "re-establish" it here. */
3857 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003858 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003859
Lars Ellenberg40424e42011-09-26 15:24:56 +02003860 /* If this is the "end of sync" confirmation, usually the peer disk
3861 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3862 * set) resync started in PausedSyncT, or if the timing of pause-/
3863 * unpause-sync events has been "just right", the peer disk may
3864 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3865 */
3866 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3867 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003868 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3869 /* If we are (becoming) SyncSource, but peer is still in sync
3870 * preparation, ignore its uptodate-ness to avoid flapping, it
3871 * will change to inconsistent once the peer reaches active
3872 * syncing states.
3873 * It may have changed syncer-paused flags, however, so we
3874 * cannot ignore this completely. */
3875 if (peer_state.conn > C_CONNECTED &&
3876 peer_state.conn < C_SYNC_SOURCE)
3877 real_peer_disk = D_INCONSISTENT;
3878
3879 /* if peer_state changes to connected at the same time,
3880 * it explicitly notifies us that it finished resync.
3881 * Maybe we should finish it up, too? */
3882 else if (os.conn >= C_SYNC_SOURCE &&
3883 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003884 if (drbd_bm_total_weight(device) <= device->rs_failed)
3885 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003886 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003887 }
3888 }
3889
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003890 /* explicit verify finished notification, stop sector reached. */
3891 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3892 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003893 ov_out_of_sync_print(device);
3894 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003895 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003896 }
3897
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003898 /* peer says his disk is inconsistent, while we think it is uptodate,
3899 * and this happens while the peer still thinks we have a sync going on,
3900 * but we think we are already done with the sync.
3901 * We ignore this to avoid flapping pdsk.
3902 * This should not happen, if the peer is a recent version of drbd. */
3903 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3904 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3905 real_peer_disk = D_UP_TO_DATE;
3906
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003907 if (ns.conn == C_WF_REPORT_PARAMS)
3908 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909
Philipp Reisner67531712010-10-27 12:21:30 +02003910 if (peer_state.conn == C_AHEAD)
3911 ns.conn = C_BEHIND;
3912
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003913 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3914 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915 int cr; /* consider resync */
3916
3917 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003918 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003919 /* if we had an established connection
3920 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003921 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003922 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003923 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924 /* if we have both been inconsistent, and the peer has been
3925 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003926 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927 /* if we had been plain connected, and the admin requested to
3928 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003929 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003930 (peer_state.conn >= C_STARTING_SYNC_S &&
3931 peer_state.conn <= C_WF_BITMAP_T));
3932
3933 if (cr)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003934 ns.conn = drbd_sync_handshake(device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003935
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003936 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003937 if (ns.conn == C_MASK) {
3938 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003939 if (device->state.disk == D_NEGOTIATING) {
3940 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941 } else if (peer_state.disk == D_NEGOTIATING) {
3942 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3943 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003944 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003945 } else {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003946 if (test_and_clear_bit(CONN_DRY_RUN, &first_peer_device(device)->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003947 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003948 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003949 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003950 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 }
3952 }
3953 }
3954
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003955 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003956 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003957 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003958 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003959 ns.peer = peer_state.role;
3960 ns.pdsk = real_peer_disk;
3961 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003962 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003963 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003964 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003965 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
3966 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003967 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003968 for temporal network outages! */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003969 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003970 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003971 tl_clear(first_peer_device(device)->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003972 drbd_uuid_new_current(device);
3973 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003974 conn_request_state(first_peer_device(device)->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003975 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003976 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003977 rv = _drbd_set_state(device, ns, cs_flags, NULL);
3978 ns = drbd_read_state(device);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003979 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003980
3981 if (rv < SS_SUCCESS) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02003982 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003983 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003984 }
3985
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003986 if (os.conn > C_WF_REPORT_PARAMS) {
3987 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988 peer_state.disk != D_NEGOTIATING ) {
3989 /* we want resync, peer has not yet decided to sync... */
3990 /* Nowadays only used when forcing a node into primary role and
3991 setting its disk to UpToDate with that */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003992 drbd_send_uuids(device);
3993 drbd_send_current_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003994 }
3995 }
3996
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003997 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003999 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004001 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002}
4003
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004004static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004006 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004007 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004008
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004009 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004010 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004011 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004013 wait_event(device->misc_wait,
4014 device->state.conn == C_WF_SYNC_UUID ||
4015 device->state.conn == C_BEHIND ||
4016 device->state.conn < C_CONNECTED ||
4017 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004018
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004019 /* D_ASSERT( device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021 /* Here the _drbd_uuid_ functions are right, current should
4022 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004023 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4024 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4025 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004026
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004027 drbd_print_uuids(device, "updated sync uuid");
4028 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004029
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004030 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031 } else
4032 dev_err(DEV, "Ignoring SyncUUID packet!\n");
4033
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004034 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004035}
4036
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004037/**
4038 * receive_bitmap_plain
4039 *
4040 * Return 0 when done, 1 when another iteration is needed, and a negative error
4041 * code upon failure.
4042 */
4043static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004044receive_bitmap_plain(struct drbd_device *device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004045 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004047 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004048 drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004049 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004050 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004051 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004052 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004054 if (want != size) {
4055 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004056 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057 }
4058 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004059 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004060 err = drbd_recv_all(first_peer_device(device)->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004061 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004062 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004063
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004064 drbd_bm_merge_lel(device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065
4066 c->word_offset += num_words;
4067 c->bit_offset = c->word_offset * BITS_PER_LONG;
4068 if (c->bit_offset > c->bm_bits)
4069 c->bit_offset = c->bm_bits;
4070
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004071 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072}
4073
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004074static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4075{
4076 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4077}
4078
4079static int dcbp_get_start(struct p_compressed_bm *p)
4080{
4081 return (p->encoding & 0x80) != 0;
4082}
4083
4084static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4085{
4086 return (p->encoding >> 4) & 0x7;
4087}
4088
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004089/**
4090 * recv_bm_rle_bits
4091 *
4092 * Return 0 when done, 1 when another iteration is needed, and a negative error
4093 * code upon failure.
4094 */
4095static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004096recv_bm_rle_bits(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004097 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004098 struct bm_xfer_ctx *c,
4099 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100{
4101 struct bitstream bs;
4102 u64 look_ahead;
4103 u64 rl;
4104 u64 tmp;
4105 unsigned long s = c->bit_offset;
4106 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004107 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 int have;
4109 int bits;
4110
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004111 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112
4113 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4114 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004115 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116
4117 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4118 bits = vli_decode_bits(&rl, look_ahead);
4119 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004120 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121
4122 if (toggle) {
4123 e = s + rl -1;
4124 if (e >= c->bm_bits) {
4125 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004126 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004127 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004128 _drbd_bm_set_bits(device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 }
4130
4131 if (have < bits) {
4132 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4133 have, bits, look_ahead,
4134 (unsigned int)(bs.cur.b - p->code),
4135 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004136 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004137 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004138 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4139 if (likely(bits < 64))
4140 look_ahead >>= bits;
4141 else
4142 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143 have -= bits;
4144
4145 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4146 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004147 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 look_ahead |= tmp << have;
4149 have += bits;
4150 }
4151
4152 c->bit_offset = s;
4153 bm_xfer_ctx_bit_to_word_offset(c);
4154
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004155 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156}
4157
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004158/**
4159 * decode_bitmap_c
4160 *
4161 * Return 0 when done, 1 when another iteration is needed, and a negative error
4162 * code upon failure.
4163 */
4164static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004165decode_bitmap_c(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004167 struct bm_xfer_ctx *c,
4168 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004170 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004171 return recv_bm_rle_bits(device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172
4173 /* other variants had been implemented for evaluation,
4174 * but have been dropped as this one turned out to be "best"
4175 * during all our tests. */
4176
4177 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004178 conn_request_state(first_peer_device(device)->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004179 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180}
4181
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004182void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183 const char *direction, struct bm_xfer_ctx *c)
4184{
4185 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004186 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004187 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4188 unsigned int plain =
4189 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4190 c->bm_words * sizeof(unsigned long);
4191 unsigned int total = c->bytes[0] + c->bytes[1];
4192 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193
4194 /* total can not be zero. but just in case: */
4195 if (total == 0)
4196 return;
4197
4198 /* don't report if not compressed */
4199 if (total >= plain)
4200 return;
4201
4202 /* total < plain. check for overflow, still */
4203 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4204 : (1000 * total / plain);
4205
4206 if (r > 1000)
4207 r = 1000;
4208
4209 r = 1000 - r;
4210 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4211 "total %u; compression: %u.%u%%\n",
4212 direction,
4213 c->bytes[1], c->packets[1],
4214 c->bytes[0], c->packets[0],
4215 total, r/10, r % 10);
4216}
4217
4218/* Since we are processing the bitfield from lower addresses to higher,
4219 it does not matter if the process it in 32 bit chunks or 64 bit
4220 chunks as long as it is little endian. (Understand it as byte stream,
4221 beginning with the lowest byte...) If we would use big endian
4222 we would need to process it from the highest address to the lowest,
4223 in order to be agnostic to the 32 vs 64 bits issue.
4224
4225 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004226static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004228 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004230 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004231
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004232 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004233 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004234 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004236 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004237 /* you are supposed to send additional out-of-sync information
4238 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004239
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004241 .bm_bits = drbd_bm_bits(device),
4242 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 };
4244
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004245 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004246 if (pi->cmd == P_BITMAP)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004247 err = receive_bitmap_plain(device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004248 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249 /* MAYBE: sanity check that we speak proto >= 90,
4250 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004251 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004253 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004255 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256 goto out;
4257 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004258 if (pi->size <= sizeof(*p)) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004259 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004260 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004261 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004263 err = drbd_recv_all(first_peer_device(device)->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004264 if (err)
4265 goto out;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004266 err = decode_bitmap_c(device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004268 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004269 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270 goto out;
4271 }
4272
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004273 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004274 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004276 if (err <= 0) {
4277 if (err < 0)
4278 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004280 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004281 err = drbd_recv_header(first_peer_device(device)->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004282 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004284 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004286 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004287
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004288 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004289 enum drbd_state_rv rv;
4290
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004291 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004292 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 goto out;
4294 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004295 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004296 D_ASSERT(rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004297 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004298 /* admin may have requested C_DISCONNECTING,
4299 * other threads may have noticed network errors */
4300 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004301 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004303 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004306 drbd_bm_unlock(device);
4307 if (!err && device->state.conn == C_WF_BITMAP_S)
4308 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004309 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310}
4311
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004312static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004314 conn_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004315 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004317 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318}
4319
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004320static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004321{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 /* Make sure we've acked all the TCP data associated
4323 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004324 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004326 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327}
4328
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004329static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004330{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004331 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004332 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004333
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004334 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004335 if (!device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004336 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004337
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004338 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004339 case C_WF_SYNC_UUID:
4340 case C_WF_BITMAP_T:
4341 case C_BEHIND:
4342 break;
4343 default:
4344 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004345 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004346 }
4347
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004348 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004349
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004350 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004351}
4352
Philipp Reisner02918be2010-08-20 14:35:10 +02004353struct data_cmd {
4354 int expect_payload;
4355 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004356 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357};
4358
Philipp Reisner02918be2010-08-20 14:35:10 +02004359static struct data_cmd drbd_cmd_handler[] = {
4360 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4361 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4362 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4363 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004364 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4365 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4366 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004367 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4368 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004369 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4370 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004371 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4372 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4373 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4374 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4375 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4376 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4377 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4378 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4379 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4380 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004381 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004382 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004383 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004384};
4385
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004386static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004388 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004389 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004390 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004392 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004393 struct data_cmd *cmd;
4394
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004395 drbd_thread_current_set_cpu(&connection->receiver);
4396 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004397 goto err_out;
4398
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004399 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004400 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004401 conn_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004402 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004403 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004404 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004405
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004406 shs = cmd->pkt_size;
4407 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004408 conn_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004409 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004410 goto err_out;
4411 }
4412
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004413 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004414 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004415 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004416 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004417 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004418 }
4419
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004420 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004421 if (err) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004422 conn_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004423 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004424 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004427 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004428
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004429 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004430 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431}
4432
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004433void conn_flush_workqueue(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434{
4435 struct drbd_wq_barrier barr;
4436
4437 barr.w.cb = w_prev_work_done;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004438 barr.w.connection = connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004439 init_completion(&barr.done);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004440 drbd_queue_work(&connection->sender_work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441 wait_for_completion(&barr.done);
4442}
4443
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004444static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004445{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004446 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004447 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004448 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004449
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004450 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452
Lars Ellenberg545752d2011-12-05 14:39:25 +01004453 /* We are about to start the cleanup after connection loss.
4454 * Make sure drbd_make_request knows about that.
4455 * Usually we should be in some network failure state already,
4456 * but just in case we are not, we fix it up here.
4457 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004458 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004459
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004461 drbd_thread_stop(&connection->asender);
4462 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004463
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004464 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004465 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4466 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004467 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004468 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004469 drbd_disconnected(device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004470 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004471 rcu_read_lock();
4472 }
4473 rcu_read_unlock();
4474
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004475 if (!list_empty(&connection->current_epoch->list))
4476 conn_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004477 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004478 atomic_set(&connection->current_epoch->epoch_size, 0);
4479 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004480
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004481 conn_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004482
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004483 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4484 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004485
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004486 spin_lock_irq(&connection->req_lock);
4487 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004488 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004489 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004490
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004491 spin_unlock_irq(&connection->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004492
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004493 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004494 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004495}
4496
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004497static int drbd_disconnected(struct drbd_device *device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004498{
Philipp Reisner360cc742011-02-08 14:29:53 +01004499 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004500
Philipp Reisner85719572010-07-21 10:20:17 +02004501 /* wait for current activity to cease. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004502 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004503 _drbd_wait_ee_list_empty(device, &device->active_ee);
4504 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4505 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004506 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507
4508 /* We do not have data structures that would allow us to
4509 * get the rs_pending_cnt down to 0 again.
4510 * * On C_SYNC_TARGET we do not have any data structures describing
4511 * the pending RSDataRequest's we have sent.
4512 * * On C_SYNC_SOURCE there is no data structure that tracks
4513 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4514 * And no, it is not the sum of the reference counts in the
4515 * resync_LRU. The resync_LRU tracks the whole operation including
4516 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4517 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004518 drbd_rs_cancel_all(device);
4519 device->rs_total = 0;
4520 device->rs_failed = 0;
4521 atomic_set(&device->rs_pending_cnt, 0);
4522 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004524 del_timer_sync(&device->resync_timer);
4525 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526
Philipp Reisnerb411b362009-09-25 16:07:19 -07004527 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4528 * w_make_resync_request etc. which may still be on the worker queue
4529 * to be "canceled" */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004530 drbd_flush_workqueue(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004532 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004534 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4535 might have issued a work again. The one before drbd_finish_peer_reqs() is
4536 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004537 drbd_flush_workqueue(device);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004538
Lars Ellenberg08332d72012-08-17 15:09:13 +02004539 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4540 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004541 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004543 kfree(device->p_uuid);
4544 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004546 if (!drbd_suspended(device))
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004547 tl_clear(first_peer_device(device)->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004548
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004549 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004550
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004551 /* serialize with bitmap writeout triggered by the state change,
4552 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004553 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004554
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555 /* tcp_close and release of sendpage pages can be deferred. I don't
4556 * want to use SO_LINGER, because apparently it can be deferred for
4557 * more than 20 seconds (longest time I checked).
4558 *
4559 * Actually we don't care for exactly when the network stack does its
4560 * put_page(), but release our reference on these pages right here.
4561 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004562 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563 if (i)
4564 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004565 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004566 if (i)
4567 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004568 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004569 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004570 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004572 D_ASSERT(list_empty(&device->read_ee));
4573 D_ASSERT(list_empty(&device->active_ee));
4574 D_ASSERT(list_empty(&device->sync_ee));
4575 D_ASSERT(list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004576
Philipp Reisner360cc742011-02-08 14:29:53 +01004577 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578}
4579
4580/*
4581 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4582 * we can agree on is stored in agreed_pro_version.
4583 *
4584 * feature flags and the reserved array should be enough room for future
4585 * enhancements of the handshake protocol, and possible plugins...
4586 *
4587 * for now, they are expected to be zero, but ignored.
4588 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004589static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004591 struct drbd_socket *sock;
4592 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004593
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004594 sock = &connection->data;
4595 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004596 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004597 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598 memset(p, 0, sizeof(*p));
4599 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4600 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004601 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602}
4603
4604/*
4605 * return values:
4606 * 1 yes, we have a valid connection
4607 * 0 oops, did not work out, please try again
4608 * -1 peer talks different language,
4609 * no point in trying again, please go standalone.
4610 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004611static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004612{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004613 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004614 struct p_connection_features *p;
4615 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004616 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004617 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004619 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004620 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621 return 0;
4622
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004623 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004624 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625 return 0;
4626
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004627 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004628 conn_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004629 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630 return -1;
4631 }
4632
Philipp Reisner77351055b2011-02-07 17:24:26 +01004633 if (pi.size != expect) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004634 conn_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004635 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004636 return -1;
4637 }
4638
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004639 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004640 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004641 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644 p->protocol_min = be32_to_cpu(p->protocol_min);
4645 p->protocol_max = be32_to_cpu(p->protocol_max);
4646 if (p->protocol_max == 0)
4647 p->protocol_max = p->protocol_min;
4648
4649 if (PRO_VERSION_MAX < p->protocol_min ||
4650 PRO_VERSION_MIN > p->protocol_max)
4651 goto incompat;
4652
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004653 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004655 conn_info(connection, "Handshake successful: "
4656 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657
4658 return 1;
4659
4660 incompat:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004661 conn_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 "I support %d-%d, peer supports %d-%d\n",
4663 PRO_VERSION_MIN, PRO_VERSION_MAX,
4664 p->protocol_min, p->protocol_max);
4665 return -1;
4666}
4667
4668#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004669static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004670{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004671 conn_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4672 conn_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004673 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674}
4675#else
4676#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004677
4678/* Return value:
4679 1 - auth succeeded,
4680 0 - failed, try again (network error),
4681 -1 - auth failed, don't try again.
4682*/
4683
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004684static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004686 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4688 struct scatterlist sg;
4689 char *response = NULL;
4690 char *right_response = NULL;
4691 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004692 unsigned int key_len;
4693 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694 unsigned int resp_size;
4695 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004696 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004697 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004698 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004699
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004700 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4701
Philipp Reisner44ed1672011-04-19 17:10:19 +02004702 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004703 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004704 key_len = strlen(nc->shared_secret);
4705 memcpy(secret, nc->shared_secret, key_len);
4706 rcu_read_unlock();
4707
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004708 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 desc.flags = 0;
4710
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004711 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004712 if (rv) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004713 conn_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004714 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715 goto fail;
4716 }
4717
4718 get_random_bytes(my_challenge, CHALLENGE_LEN);
4719
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004720 sock = &connection->data;
4721 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004722 rv = 0;
4723 goto fail;
4724 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004725 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004726 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004727 if (!rv)
4728 goto fail;
4729
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004730 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004731 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732 rv = 0;
4733 goto fail;
4734 }
4735
Philipp Reisner77351055b2011-02-07 17:24:26 +01004736 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004737 conn_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004738 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004739 rv = 0;
4740 goto fail;
4741 }
4742
Philipp Reisner77351055b2011-02-07 17:24:26 +01004743 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004744 conn_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004745 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004746 goto fail;
4747 }
4748
Philipp Reisner77351055b2011-02-07 17:24:26 +01004749 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004750 if (peers_ch == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004751 conn_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004752 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004753 goto fail;
4754 }
4755
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004756 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004757 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758 rv = 0;
4759 goto fail;
4760 }
4761
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004762 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004763 response = kmalloc(resp_size, GFP_NOIO);
4764 if (response == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004765 conn_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004766 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004767 goto fail;
4768 }
4769
4770 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004771 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004772
4773 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4774 if (rv) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004775 conn_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004776 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777 goto fail;
4778 }
4779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004780 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004781 rv = 0;
4782 goto fail;
4783 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004784 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004785 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004786 if (!rv)
4787 goto fail;
4788
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004789 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004790 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004791 rv = 0;
4792 goto fail;
4793 }
4794
Philipp Reisner77351055b2011-02-07 17:24:26 +01004795 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004796 conn_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004797 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004798 rv = 0;
4799 goto fail;
4800 }
4801
Philipp Reisner77351055b2011-02-07 17:24:26 +01004802 if (pi.size != resp_size) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004803 conn_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004804 rv = 0;
4805 goto fail;
4806 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004808 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004809 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004810 rv = 0;
4811 goto fail;
4812 }
4813
4814 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004815 if (right_response == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004816 conn_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004817 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004818 goto fail;
4819 }
4820
4821 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4822
4823 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4824 if (rv) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004825 conn_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004826 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004827 goto fail;
4828 }
4829
4830 rv = !memcmp(response, right_response, resp_size);
4831
4832 if (rv)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004833 conn_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004834 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004835 else
4836 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004837
4838 fail:
4839 kfree(peers_ch);
4840 kfree(response);
4841 kfree(right_response);
4842
4843 return rv;
4844}
4845#endif
4846
4847int drbdd_init(struct drbd_thread *thi)
4848{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004849 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004850 int h;
4851
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004852 conn_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004853
4854 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004855 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004856 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004857 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004858 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004859 }
4860 if (h == -1) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004861 conn_warn(connection, "Discarding network configuration.\n");
4862 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004863 }
4864 } while (h == 0);
4865
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004866 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004867 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004868
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004869 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004870
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004871 conn_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004872 return 0;
4873}
4874
4875/* ********* acknowledge sender ******** */
4876
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004877static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004878{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004879 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004880 int retcode = be32_to_cpu(p->retcode);
4881
4882 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004883 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004884 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004885 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
4886 conn_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004887 drbd_set_st_err_str(retcode), retcode);
4888 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004889 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004890
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004891 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004892}
4893
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004894static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004896 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004897 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004898 int retcode = be32_to_cpu(p->retcode);
4899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004900 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004901 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004902 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004903
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004904 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
4905 D_ASSERT(connection->agreed_pro_version < 100);
4906 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004907 }
4908
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004909 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004910 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004911 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004912 set_bit(CL_ST_CHG_FAIL, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004914 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004916 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004917
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004918 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004919}
4920
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004921static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004922{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004923 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004924
4925}
4926
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004927static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004928{
4929 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004930 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
4931 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
4932 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004933
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004934 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004935}
4936
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004937static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004938{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004939 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004940 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941 sector_t sector = be64_to_cpu(p->sector);
4942 int blksize = be32_to_cpu(p->blksize);
4943
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004944 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004945 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004946 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004947
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004948 D_ASSERT(first_peer_device(device)->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004949
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004950 update_peer_seq(device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004952 if (get_ldev(device)) {
4953 drbd_rs_complete_io(device, sector);
4954 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004955 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004956 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4957 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004958 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004959 dec_rs_pending(device);
4960 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004961
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004962 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004963}
4964
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004965static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004966validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004967 struct rb_root *root, const char *func,
4968 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004969{
4970 struct drbd_request *req;
4971 struct bio_and_error m;
4972
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004973 spin_lock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004974 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004975 if (unlikely(!req)) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004976 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004977 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004978 }
4979 __req_mod(req, what, &m);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004980 spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004981
4982 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004983 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004984 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004985}
4986
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004987static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004988{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004989 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004990 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004991 sector_t sector = be64_to_cpu(p->sector);
4992 int blksize = be32_to_cpu(p->blksize);
4993 enum drbd_req_event what;
4994
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004995 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004996 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004997 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004998
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004999 update_peer_seq(device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005000
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005001 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005002 drbd_set_in_sync(device, sector, blksize);
5003 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005004 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005005 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005006 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005008 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005009 break;
5010 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005011 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012 break;
5013 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005014 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005015 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005016 case P_SUPERSEDED:
5017 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005018 break;
5019 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005020 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005021 break;
5022 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005023 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024 }
5025
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005026 return validate_req_change_req_state(device, p->block_id, sector,
5027 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005028 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029}
5030
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005031static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005032{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005033 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005034 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005035 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005036 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005037 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005039 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005040 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005041 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005042
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005043 update_peer_seq(device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005044
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005045 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005046 dec_rs_pending(device);
5047 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005048 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005049 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005050
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005051 err = validate_req_change_req_state(device, p->block_id, sector,
5052 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005053 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005054 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005055 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5056 The master bio might already be completed, therefore the
5057 request is no longer in the collision hash. */
5058 /* In Protocol B we might already have got a P_RECV_ACK
5059 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005060 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005061 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005062 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005063}
5064
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005065static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005066{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005067 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005068 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005069 sector_t sector = be64_to_cpu(p->sector);
5070
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005071 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005072 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005073 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005074
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005075 update_peer_seq(device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005076
Philipp Reisner380207d2011-11-11 12:31:20 +01005077 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005078 (unsigned long long)sector, be32_to_cpu(p->blksize));
5079
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005080 return validate_req_change_req_state(device, p->block_id, sector,
5081 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005082 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005083}
5084
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005085static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005086{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005087 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005088 sector_t sector;
5089 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005090 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005091
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005092 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005093 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005094 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005095
5096 sector = be64_to_cpu(p->sector);
5097 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005098
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005099 update_peer_seq(device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005100
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005101 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005102
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005103 if (get_ldev_if_state(device, D_FAILED)) {
5104 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005105 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005106 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005107 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005108 case P_RS_CANCEL:
5109 break;
5110 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005111 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005112 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005113 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005114 }
5115
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005116 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005117}
5118
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005119static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005121 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005122 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005123 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005124
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005125 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005126
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005127 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005128 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5129 struct drbd_device *device = peer_device->device;
5130
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005131 if (device->state.conn == C_AHEAD &&
5132 atomic_read(&device->ap_in_flight) == 0 &&
5133 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5134 device->start_resync_timer.expires = jiffies + HZ;
5135 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005136 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005137 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005138 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005139
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005140 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141}
5142
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005143static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005145 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005146 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005147 struct drbd_work *w;
5148 sector_t sector;
5149 int size;
5150
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005151 device = vnr_to_device(connection, pi->vnr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005152 if (!device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005153 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005154
Philipp Reisnerb411b362009-09-25 16:07:19 -07005155 sector = be64_to_cpu(p->sector);
5156 size = be32_to_cpu(p->blksize);
5157
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005158 update_peer_seq(device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005159
5160 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005161 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005162 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005163 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005165 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005166 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005167
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005168 drbd_rs_complete_io(device, sector);
5169 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005170
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005171 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005172
5173 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005174 if ((device->ov_left & 0x200) == 0x200)
5175 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005176
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005177 if (device->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005178 w = kmalloc(sizeof(*w), GFP_NOIO);
5179 if (w) {
5180 w->cb = w_ov_finished;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005181 w->device = device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02005182 drbd_queue_work(&first_peer_device(device)->connection->sender_work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005183 } else {
5184 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005185 ov_out_of_sync_print(device);
5186 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005187 }
5188 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005189 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005190 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005191}
5192
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005193static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005194{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005195 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005196}
5197
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005198static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005199{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005200 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005201 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005202
5203 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005204 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005205 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005206
5207 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005208 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5209 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005210 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005211 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005212 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005213 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005214 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005215 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005216 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005217 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005218 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005219 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005220
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005221 spin_lock_irq(&connection->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005222 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5223 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005224 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005225 if (not_empty)
5226 break;
5227 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005228 spin_unlock_irq(&connection->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005229 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005230 } while (not_empty);
5231
5232 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005233}
5234
5235struct asender_cmd {
5236 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005237 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005238};
5239
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005240static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005241 [P_PING] = { 0, got_Ping },
5242 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005243 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5244 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5245 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005246 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005247 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5248 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005249 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005250 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5251 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5252 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5253 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005254 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005255 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5256 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5257 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005258};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005259
5260int drbd_asender(struct drbd_thread *thi)
5261{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005262 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005263 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005264 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005265 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005266 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005267 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005268 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005269 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005270 bool ping_timeout_active = false;
5271 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005272 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005273 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005274
Philipp Reisner3990e042013-03-27 14:08:48 +01005275 rv = sched_setscheduler(current, SCHED_RR, &param);
5276 if (rv < 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005277 conn_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005278
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005279 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005280 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005281
5282 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005283 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005284 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005285 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005286 ping_int = nc->ping_int;
5287 rcu_read_unlock();
5288
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005289 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5290 if (drbd_send_ping(connection)) {
5291 conn_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005292 goto reconnect;
5293 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005294 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005295 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005296 }
5297
Philipp Reisner32862ec2011-02-08 16:41:01 +01005298 /* TODO: conditionally cork; it may hurt latency if we cork without
5299 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005300 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005301 drbd_tcp_cork(connection->meta.socket);
5302 if (connection_finish_peer_reqs(connection)) {
5303 conn_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005304 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005305 }
5306 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005307 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005308 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005309
5310 /* short circuit, recv_msg would return EINTR anyways. */
5311 if (signal_pending(current))
5312 continue;
5313
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005314 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5315 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316
5317 flush_signals(current);
5318
5319 /* Note:
5320 * -EINTR (on meta) we got a signal
5321 * -EAGAIN (on meta) rcvtimeo expired
5322 * -ECONNRESET other side closed the connection
5323 * -ERESTARTSYS (on data) we got a signal
5324 * rv < 0 other than above: unexpected error!
5325 * rv == expected: full header or command
5326 * rv < expected: "woken" by signal during receive
5327 * rv == 0 : "connection shut down by peer"
5328 */
5329 if (likely(rv > 0)) {
5330 received += rv;
5331 buf += rv;
5332 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005333 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005334 long t;
5335 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005336 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005337 rcu_read_unlock();
5338
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005339 t = wait_event_timeout(connection->ping_wait,
5340 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005341 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005342 if (t)
5343 break;
5344 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005345 conn_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005346 goto reconnect;
5347 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005348 /* If the data socket received something meanwhile,
5349 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005350 if (time_after(connection->last_received,
5351 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005352 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005353 if (ping_timeout_active) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005354 conn_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005355 goto reconnect;
5356 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005357 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005358 continue;
5359 } else if (rv == -EINTR) {
5360 continue;
5361 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005362 conn_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005363 goto reconnect;
5364 }
5365
5366 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005367 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005368 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005369 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005370 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005371 conn_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005372 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005373 goto disconnect;
5374 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005375 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005376 if (pi.size != expect - header_size) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005377 conn_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005378 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005379 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005380 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005381 }
5382 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005383 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005384
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005385 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005386 if (err) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005387 conn_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005388 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005389 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005390
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005391 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005392
Philipp Reisner44ed1672011-04-19 17:10:19 +02005393 if (cmd == &asender_tbl[P_PING_ACK]) {
5394 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005395 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005396 ping_timeout_active = false;
5397 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005398
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005399 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005400 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005401 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005402 cmd = NULL;
5403 }
5404 }
5405
5406 if (0) {
5407reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005408 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5409 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005410 }
5411 if (0) {
5412disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005413 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005414 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005415 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005416
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005417 conn_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005418
5419 return 0;
5420}