blob: 1d9a99c031fad711ec80dc463a8bc20a72322f9c [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020067static int drbd_disconnected(struct drbd_peer_device *);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204 struct list_head *le, *tle;
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200211 list_for_each_safe(le, tle, &device->net_ee) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200212 peer_req = list_entry(le, struct drbd_peer_request, dw.w.list);
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200213 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 break;
215 list_move(le, to_be_freed);
216 }
217}
218
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200219static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220{
221 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100222 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200224 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200225 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200226 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700227
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200228 list_for_each_entry_safe(peer_req, t, &reclaimed, dw.w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200229 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230}
231
232/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200233 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200234 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200235 * @number: number of pages requested
236 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700237 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 * Tries to allocate number pages, first from our own page pool, then from
239 * the kernel, unless this allocation would exceed the max_buffers setting.
240 * Possibly retry until DRBD frees sufficient pages somewhere else.
241 *
242 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200244struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200245 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200247 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200249 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200251 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 /* Yes, we may run up to @number over max_buffers. If we
254 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200255 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200256 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200257 mxb = nc ? nc->max_buffers : 1000000;
258 rcu_read_unlock();
259
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200260 if (atomic_read(&device->pp_in_use) < mxb)
261 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200263 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700264 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
265
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200266 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200268 if (atomic_read(&device->pp_in_use) < mxb) {
269 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700270 if (page)
271 break;
272 }
273
274 if (!retry)
275 break;
276
277 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200278 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700279 break;
280 }
281
282 schedule();
283 }
284 finish_wait(&drbd_pp_wait, &wait);
285
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200286 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200287 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288 return page;
289}
290
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200291/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200292 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 * Either links the page chain back to the global pool,
294 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200295static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200297 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700298 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299
Lars Ellenberga73ff322012-06-25 19:15:38 +0200300 if (page == NULL)
301 return;
302
Philipp Reisner81a5d602011-02-22 19:53:16 -0500303 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200304 i = page_chain_free(page);
305 else {
306 struct page *tmp;
307 tmp = page_chain_tail(page, &i);
308 spin_lock(&drbd_pp_lock);
309 page_chain_add(&drbd_pp_pool, page, tmp);
310 drbd_pp_vacant += i;
311 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700312 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200313 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200314 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200315 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200316 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 wake_up(&drbd_pp_wait);
318}
319
320/*
321You need to hold the req_lock:
322 _drbd_wait_ee_list_empty()
323
324You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200325 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200326 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200327 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200329 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700330 drbd_clear_done_ee()
331 drbd_wait_ee_list_empty()
332*/
333
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100334struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200335drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200336 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200338 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100339 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200340 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200341 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200343 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700344 return NULL;
345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
347 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200349 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351 }
352
Lars Ellenberga73ff322012-06-25 19:15:38 +0200353 if (data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200354 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200355 if (!page)
356 goto fail;
357 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100359 drbd_clear_interval(&peer_req->i);
360 peer_req->i.size = data_size;
361 peer_req->i.sector = sector;
362 peer_req->i.local = false;
363 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 peer_req->epoch = NULL;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200366 peer_req->dw.device = device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100367 peer_req->pages = page;
368 atomic_set(&peer_req->pending_bios, 0);
369 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100370 /*
371 * The block_id is opaque to the receiver. It is not endianness
372 * converted, and sent back to the sender unchanged.
373 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100376 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200378 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 return NULL;
381}
382
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200383void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100384 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100386 if (peer_req->flags & EE_HAS_DIGEST)
387 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200389 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
390 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100391 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392}
393
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395{
396 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200399 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200401 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200403 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200405 list_for_each_entry_safe(peer_req, t, &work_list, dw.w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200406 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 count++;
408 }
409 return count;
410}
411
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200413 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200415static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416{
417 LIST_HEAD(work_list);
418 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100420 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200422 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200423 reclaim_finished_net_peer_reqs(device, &reclaimed);
424 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200425 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200427 list_for_each_entry_safe(peer_req, t, &reclaimed, dw.w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200428 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429
430 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200431 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432 * all ignore the last argument.
433 */
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200434 list_for_each_entry_safe(peer_req, t, &work_list, dw.w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100435 int err2;
436
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200438 err2 = peer_req->dw.w.cb(&peer_req->dw.w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100439 if (!err)
440 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200441 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200443 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446}
447
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200448static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200449 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450{
451 DEFINE_WAIT(wait);
452
453 /* avoids spin_lock/unlock
454 * and calling prepare_to_wait in the fast path */
455 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200456 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200457 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100458 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200459 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200460 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700461 }
462}
463
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200465 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700466{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200467 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200468 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200469 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700470}
471
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100472static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700473{
474 mm_segment_t oldfs;
475 struct kvec iov = {
476 .iov_base = buf,
477 .iov_len = size,
478 };
479 struct msghdr msg = {
480 .msg_iovlen = 1,
481 .msg_iov = (struct iovec *)&iov,
482 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
483 };
484 int rv;
485
486 oldfs = get_fs();
487 set_fs(KERNEL_DS);
488 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
489 set_fs(oldfs);
490
491 return rv;
492}
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700496 int rv;
497
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200498 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700499
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200500 if (rv < 0) {
501 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200502 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200503 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200504 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200505 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200506 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200507 long t;
508 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200509 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200510 rcu_read_unlock();
511
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200512 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200513
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 if (t)
515 goto out;
516 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200517 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200518 }
519
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200521 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700522
Philipp Reisner599377a2012-08-17 14:50:22 +0200523out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700524 return rv;
525}
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528{
529 int err;
530
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200531 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100532 if (err != size) {
533 if (err >= 0)
534 err = -EIO;
535 } else
536 err = 0;
537 return err;
538}
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541{
542 int err;
543
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200544 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100545 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200546 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100547 return err;
548}
549
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200550/* quoting tcp(7):
551 * On individual connections, the socket buffer size must be set prior to the
552 * listen(2) or connect(2) calls in order to have it take effect.
553 * This is our wrapper to do so.
554 */
555static void drbd_setbufsize(struct socket *sock, unsigned int snd,
556 unsigned int rcv)
557{
558 /* open coded SO_SNDBUF, SO_RCVBUF */
559 if (snd) {
560 sock->sk->sk_sndbuf = snd;
561 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
562 }
563 if (rcv) {
564 sock->sk->sk_rcvbuf = rcv;
565 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
566 }
567}
568
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200569static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700570{
571 const char *what;
572 struct socket *sock;
573 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200574 struct sockaddr_in6 peer_in6;
575 struct net_conf *nc;
576 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200577 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700578 int disconnect_on_error = 1;
579
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200581 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 if (!nc) {
583 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700584 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200585 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586 sndbuf_size = nc->sndbuf_size;
587 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200588 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200589 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200590
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200591 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
592 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200593
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200594 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200595 src_in6.sin6_port = 0;
596 else
597 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
598
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200599 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
600 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200603 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
604 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700605 if (err < 0) {
606 sock = NULL;
607 goto out;
608 }
609
610 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200611 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200612 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700613
614 /* explicitly bind to the configured IP as source IP
615 * for the outgoing connections.
616 * This is needed for multihomed hosts and to be
617 * able to use lo: interfaces for drbd.
618 * Make sure to use 0 as port number, so linux selects
619 * a free one dynamically.
620 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200622 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700623 if (err < 0)
624 goto out;
625
626 /* connect may fail, peer not yet available.
627 * stay C_WF_CONNECTION, don't go Disconnecting! */
628 disconnect_on_error = 0;
629 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200630 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631
632out:
633 if (err < 0) {
634 if (sock) {
635 sock_release(sock);
636 sock = NULL;
637 }
638 switch (-err) {
639 /* timeout, busy, signal pending */
640 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
641 case EINTR: case ERESTARTSYS:
642 /* peer not (yet) available, network problem */
643 case ECONNREFUSED: case ENETUNREACH:
644 case EHOSTDOWN: case EHOSTUNREACH:
645 disconnect_on_error = 0;
646 break;
647 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200648 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 }
650 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200651 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200653
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654 return sock;
655}
656
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200657struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200658 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200659 struct socket *s_listen;
660 struct completion door_bell;
661 void (*original_sk_state_change)(struct sock *sk);
662
663};
664
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200665static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200667 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200668 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200669
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200670 state_change = ad->original_sk_state_change;
671 if (sk->sk_state == TCP_ESTABLISHED)
672 complete(&ad->door_bell);
673 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200674}
675
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200676static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200678 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200680 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200681 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682 const char *what;
683
Philipp Reisner44ed1672011-04-19 17:10:19 +0200684 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200685 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 if (!nc) {
687 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200688 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200689 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200690 sndbuf_size = nc->sndbuf_size;
691 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200692 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200694 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
695 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696
697 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200698 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200699 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700700 if (err) {
701 s_listen = NULL;
702 goto out;
703 }
704
Philipp Reisner98683652012-11-09 14:18:43 +0100705 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200706 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707
708 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200709 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 if (err < 0)
711 goto out;
712
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 ad->s_listen = s_listen;
714 write_lock_bh(&s_listen->sk->sk_callback_lock);
715 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200716 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200717 s_listen->sk->sk_user_data = ad;
718 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719
Philipp Reisner2820fd32012-07-12 10:22:48 +0200720 what = "listen";
721 err = s_listen->ops->listen(s_listen, 5);
722 if (err < 0)
723 goto out;
724
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200725 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700726out:
727 if (s_listen)
728 sock_release(s_listen);
729 if (err < 0) {
730 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200731 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200732 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733 }
734 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200735
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200736 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200737}
738
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200739static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
740{
741 write_lock_bh(&sk->sk_callback_lock);
742 sk->sk_state_change = ad->original_sk_state_change;
743 sk->sk_user_data = NULL;
744 write_unlock_bh(&sk->sk_callback_lock);
745}
746
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200747static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200748{
749 int timeo, connect_int, err = 0;
750 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 struct net_conf *nc;
752
753 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200754 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200755 if (!nc) {
756 rcu_read_unlock();
757 return NULL;
758 }
759 connect_int = nc->connect_int;
760 rcu_read_unlock();
761
762 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700763 /* 28.5% random jitter */
764 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
767 if (err <= 0)
768 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200769
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200770 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 if (err < 0) {
772 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200773 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200774 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775 }
776 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700777
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200778 if (s_estab)
779 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780
781 return s_estab;
782}
783
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200786static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200787 enum drbd_packet cmd)
788{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200789 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200790 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200791 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792}
793
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200794static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200796 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797 struct packet_info pi;
798 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200800 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200801 if (err != header_size) {
802 if (err >= 0)
803 err = -EIO;
804 return err;
805 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200806 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200807 if (err)
808 return err;
809 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810}
811
812/**
813 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814 * @sock: pointer to the pointer to the socket.
815 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100816static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700817{
818 int rr;
819 char tb[4];
820
821 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100822 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100824 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825
826 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100827 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828 } else {
829 sock_release(*sock);
830 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100831 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700832 }
833}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100834/* Gets called if a connection is established, or if a new minor gets created
835 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200836int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100837{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200838 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100839 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100840
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200841 atomic_set(&device->packet_seq, 0);
842 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100843
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200844 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
845 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200846 &device->own_state_mutex;
Philipp Reisner8410da8f02011-02-11 20:11:10 +0100847
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200848 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100849 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200850 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100851 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200852 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100853 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200854 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200855 clear_bit(USE_DEGR_WFC_T, &device->flags);
856 clear_bit(RESIZE_PENDING, &device->flags);
857 atomic_set(&device->ap_in_flight, 0);
858 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100859 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100860}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861
862/*
863 * return values:
864 * 1 yes, we have a valid connection
865 * 0 oops, did not work out, please try again
866 * -1 peer talks different language,
867 * no point in trying again, please go standalone.
868 * -2 We do not have a network config...
869 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200870static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871{
Philipp Reisner7da35862011-12-19 22:42:56 +0100872 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200873 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200874 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200875 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200876 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200877 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200878 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200879 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200880 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
881 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200883 clear_bit(DISCONNECT_SENT, &connection->flags);
884 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885 return -2;
886
Philipp Reisner7da35862011-12-19 22:42:56 +0100887 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200888 sock.sbuf = connection->data.sbuf;
889 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100890 sock.socket = NULL;
891 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200892 msock.sbuf = connection->meta.sbuf;
893 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100894 msock.socket = NULL;
895
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100896 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200897 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200899 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200900 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901
902 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200903 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100907 if (!sock.socket) {
908 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200909 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100910 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200911 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100912 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200913 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200915 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 goto out_release_sockets;
917 }
918 }
919
Philipp Reisner7da35862011-12-19 22:42:56 +0100920 if (sock.socket && msock.socket) {
921 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200922 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100923 timeout = nc->ping_timeo * HZ / 10;
924 rcu_read_unlock();
925 schedule_timeout_interruptible(timeout);
926 ok = drbd_socket_okay(&sock.socket);
927 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928 if (ok)
929 break;
930 }
931
932retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200933 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200935 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100936 drbd_socket_okay(&sock.socket);
937 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200938 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200939 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100940 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200941 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200943 sock.socket = s;
944 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100946 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700947 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200948 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200949 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100950 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200951 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200953 msock.socket = s;
954 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100956 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 break;
958 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200959 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200961randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700962 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 goto retry;
964 }
965 }
966
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200967 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 goto out_release_sockets;
969 if (signal_pending(current)) {
970 flush_signals(current);
971 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200972 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973 goto out_release_sockets;
974 }
975
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200976 ok = drbd_socket_okay(&sock.socket);
977 ok = drbd_socket_okay(&msock.socket) && ok;
978 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200980 if (ad.s_listen)
981 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982
Philipp Reisner98683652012-11-09 14:18:43 +0100983 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
984 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985
Philipp Reisner7da35862011-12-19 22:42:56 +0100986 sock.socket->sk->sk_allocation = GFP_NOIO;
987 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988
Philipp Reisner7da35862011-12-19 22:42:56 +0100989 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
990 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200993 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100994 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200995 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200997 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200998 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999
Philipp Reisner7da35862011-12-19 22:42:56 +01001000 sock.socket->sk->sk_sndtimeo =
1001 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001002
Philipp Reisner7da35862011-12-19 22:42:56 +01001003 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001004 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001005 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001006 rcu_read_unlock();
1007
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
1010 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001011 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001012 drbd_tcp_nodelay(sock.socket);
1013 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001015 connection->data.socket = sock.socket;
1016 connection->meta.socket = msock.socket;
1017 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001019 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020 if (h <= 0)
1021 return h;
1022
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001023 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001024 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001025 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001026 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001027 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001029 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001030 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001031 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 }
1033 }
1034
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001035 connection->data.socket->sk->sk_sndtimeo = timeout;
1036 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001038 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001039 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001040
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001041 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001042
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001043 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1045 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001046 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001047 rcu_read_unlock();
1048
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001049 /* Prevent a race between resync-handshake and
1050 * being promoted to Primary.
1051 *
1052 * Grab and release the state mutex, so we know that any current
1053 * drbd_set_role() is finished, and any incoming drbd_set_role
1054 * will see the STATE_SENT flag, and wait for it to be cleared.
1055 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001056 mutex_lock(device->state_mutex);
1057 mutex_unlock(device->state_mutex);
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001058
Philipp Reisner08b165b2011-09-05 16:22:33 +02001059 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001060 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001061 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001062 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001063
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001064 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001065 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001066 rcu_read_lock();
1067 }
1068 rcu_read_unlock();
1069
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001070 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1071 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1072 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001073 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001074 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001075
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001076 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001078 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001079 /* The discard_my_data flag is a single-shot modifier to the next
1080 * connection attempt, the handshake of which is now well underway.
1081 * No need for rcu style copying of the whole struct
1082 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001083 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001084 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001085
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001086 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087
1088out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001089 if (ad.s_listen)
1090 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001091 if (sock.socket)
1092 sock_release(sock.socket);
1093 if (msock.socket)
1094 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095 return -1;
1096}
1097
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001098static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001100 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001101
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001102 if (header_size == sizeof(struct p_header100) &&
1103 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1104 struct p_header100 *h = header;
1105 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001106 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001107 return -EINVAL;
1108 }
1109 pi->vnr = be16_to_cpu(h->volume);
1110 pi->cmd = be16_to_cpu(h->command);
1111 pi->size = be32_to_cpu(h->length);
1112 } else if (header_size == sizeof(struct p_header95) &&
1113 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001115 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001116 pi->size = be32_to_cpu(h->length);
1117 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001118 } else if (header_size == sizeof(struct p_header80) &&
1119 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1120 struct p_header80 *h = header;
1121 pi->cmd = be16_to_cpu(h->command);
1122 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001123 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001124 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001125 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001126 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001127 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001128 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001129 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001130 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001131 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001132}
1133
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001134static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001135{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001136 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001137 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001138
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001139 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001140 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001141 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001142
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001143 err = decode_header(connection, buffer, pi);
1144 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001146 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147}
1148
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001149static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001150{
1151 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001152 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001153 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001155 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001156 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001157 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1158 struct drbd_device *device = peer_device->device;
1159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001160 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001161 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001162 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001163 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001164
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001165 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001166 GFP_NOIO, NULL);
1167 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001168 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001169 /* would rather check on EOPNOTSUPP, but that is not reliable.
1170 * don't try again for ANY return value != 0
1171 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001172 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001173 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001174 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001175 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001176
1177 rcu_read_lock();
1178 if (rv)
1179 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001181 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183}
1184
1185/**
1186 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001187 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001188 * @epoch: Epoch object.
1189 * @ev: Epoch event.
1190 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001191static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192 struct drbd_epoch *epoch,
1193 enum epoch_event ev)
1194{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001195 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 enum finish_epoch rv = FE_STILL_LIVE;
1198
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001199 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001200 do {
1201 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202
1203 epoch_size = atomic_read(&epoch->epoch_size);
1204
1205 switch (ev & ~EV_CLEANUP) {
1206 case EV_PUT:
1207 atomic_dec(&epoch->active);
1208 break;
1209 case EV_GOT_BARRIER_NR:
1210 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 break;
1212 case EV_BECAME_LAST:
1213 /* nothing to do*/
1214 break;
1215 }
1216
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217 if (epoch_size != 0 &&
1218 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001219 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001221 spin_unlock(&connection->epoch_lock);
1222 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1223 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001225#if 0
1226 /* FIXME: dec unacked on connection, once we have
1227 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001228 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001229 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001230#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001232 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1234 list_del(&epoch->list);
1235 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001236 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237 kfree(epoch);
1238
1239 if (rv == FE_STILL_LIVE)
1240 rv = FE_DESTROYED;
1241 } else {
1242 epoch->flags = 0;
1243 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001244 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 if (rv == FE_STILL_LIVE)
1246 rv = FE_RECYCLED;
1247 }
1248 }
1249
1250 if (!next_epoch)
1251 break;
1252
1253 epoch = next_epoch;
1254 } while (1);
1255
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001256 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258 return rv;
1259}
1260
1261/**
1262 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001263 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264 * @wo: Write ordering method to try.
1265 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001266void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001268 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001269 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001271 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272 static char *write_ordering_str[] = {
1273 [WO_none] = "none",
1274 [WO_drain_io] = "drain",
1275 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276 };
1277
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001278 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001280 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001281 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1282 struct drbd_device *device = peer_device->device;
1283
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001284 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001285 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001286 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001287
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001288 if (wo == WO_bdev_flush && !dc->disk_flushes)
1289 wo = WO_drain_io;
1290 if (wo == WO_drain_io && !dc->disk_drain)
1291 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001292 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001293 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001294 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001295 connection->write_ordering = wo;
1296 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001297 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298}
1299
1300/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001301 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001302 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001303 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001304 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001305 *
1306 * May spread the pages to multiple bios,
1307 * depending on bio_add_page restrictions.
1308 *
1309 * Returns 0 if all bios have been submitted,
1310 * -ENOMEM if we could not allocate enough bios,
1311 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1312 * single page to an empty bio (which should never happen and likely indicates
1313 * that the lower level IO stack is in some way broken). This has been observed
1314 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001315 */
1316/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001317int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001318 struct drbd_peer_request *peer_req,
1319 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001320{
1321 struct bio *bios = NULL;
1322 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001323 struct page *page = peer_req->pages;
1324 sector_t sector = peer_req->i.sector;
1325 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001326 unsigned n_bios = 0;
1327 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001328 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001329
1330 /* In most cases, we will only need one bio. But in case the lower
1331 * level restrictions happen to be different at this offset on this
1332 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001333 * request in more than one bio.
1334 *
1335 * Plain bio_alloc is good enough here, this is no DRBD internally
1336 * generated bio, but a bio allocated on behalf of the peer.
1337 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338next_bio:
1339 bio = bio_alloc(GFP_NOIO, nr_pages);
1340 if (!bio) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001341 drbd_err(device, "submit_ee: Allocation of a bio failed\n");
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001342 goto fail;
1343 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001344 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001345 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001346 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001347 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001349 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001350
1351 bio->bi_next = bios;
1352 bios = bio;
1353 ++n_bios;
1354
1355 page_chain_for_each(page) {
1356 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1357 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001358 /* A single page must always be possible!
1359 * But in case it fails anyways,
1360 * we deal with it, and complain (below). */
1361 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001362 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001363 "bio_add_page failed for len=%u, "
1364 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001365 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001366 err = -ENOSPC;
1367 goto fail;
1368 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001369 goto next_bio;
1370 }
1371 ds -= len;
1372 sector += len >> 9;
1373 --nr_pages;
1374 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001375 D_ASSERT(device, page == NULL);
1376 D_ASSERT(device, ds == 0);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001378 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001379 do {
1380 bio = bios;
1381 bios = bios->bi_next;
1382 bio->bi_next = NULL;
1383
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001384 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001385 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001386 return 0;
1387
1388fail:
1389 while (bios) {
1390 bio = bios;
1391 bios = bios->bi_next;
1392 bio_put(bio);
1393 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001394 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001395}
1396
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001397static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001398 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001399{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001400 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001401
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001402 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001403 drbd_clear_interval(i);
1404
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001405 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001406 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001407 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001408}
1409
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001410static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001411{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001412 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001413 int vnr;
1414
1415 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001416 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1417 struct drbd_device *device = peer_device->device;
1418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001419 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001420 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001421 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001422 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001423 rcu_read_lock();
1424 }
1425 rcu_read_unlock();
1426}
1427
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001428static struct drbd_peer_device *
1429conn_peer_device(struct drbd_connection *connection, int volume_number)
1430{
1431 return idr_find(&connection->peer_devices, volume_number);
1432}
1433
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001434static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001435{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001436 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001437 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438 struct drbd_epoch *epoch;
1439
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001440 /* FIXME these are unacked on connection,
1441 * not a specific (peer)device.
1442 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001443 connection->current_epoch->barrier_nr = p->barrier;
1444 connection->current_epoch->connection = connection;
1445 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001446
1447 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1448 * the activity log, which means it would not be resynced in case the
1449 * R_PRIMARY crashes now.
1450 * Therefore we must send the barrier_ack after the barrier request was
1451 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001452 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001453 case WO_none:
1454 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001455 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001456
1457 /* receiver context, in the writeout path of the other node.
1458 * avoid potential distributed deadlock */
1459 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1460 if (epoch)
1461 break;
1462 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001463 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001464 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
1466 case WO_bdev_flush:
1467 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001468 conn_wait_active_ee_empty(connection);
1469 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001470
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001471 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001472 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1473 if (epoch)
1474 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475 }
1476
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001477 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001478 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001479 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001480 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481 }
1482
1483 epoch->flags = 0;
1484 atomic_set(&epoch->epoch_size, 0);
1485 atomic_set(&epoch->active, 0);
1486
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001487 spin_lock(&connection->epoch_lock);
1488 if (atomic_read(&connection->current_epoch->epoch_size)) {
1489 list_add(&epoch->list, &connection->current_epoch->list);
1490 connection->current_epoch = epoch;
1491 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492 } else {
1493 /* The current_epoch got recycled while we allocated this one... */
1494 kfree(epoch);
1495 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001496 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001498 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499}
1500
1501/* used from receive_RSDataReply (recv_resync_read)
1502 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001503static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001504read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001505 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001507 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001508 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001509 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001511 int dgs, ds, err;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001512 void *dig_in = peer_device->connection->int_dig_in;
1513 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001514 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001516 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001517 if (peer_device->connection->peer_integrity_tfm) {
1518 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001519 /*
1520 * FIXME: Receive the incoming digest into the receive buffer
1521 * here, together with its struct p_data?
1522 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001523 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001524 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001526 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527 }
1528
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001529 if (!expect(IS_ALIGNED(data_size, 512)))
1530 return NULL;
1531 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1532 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533
Lars Ellenberg66660322010-04-06 12:15:04 +02001534 /* even though we trust out peer,
1535 * we sometimes have to double check. */
1536 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001537 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001538 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001539 (unsigned long long)capacity,
1540 (unsigned long long)sector, data_size);
1541 return NULL;
1542 }
1543
Philipp Reisnerb411b362009-09-25 16:07:19 -07001544 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1545 * "criss-cross" setup, that might cause write-out on some other DRBD,
1546 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001547 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001548 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001550
Lars Ellenberga73ff322012-06-25 19:15:38 +02001551 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001552 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001553
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001555 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001556 page_chain_for_each(page) {
1557 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001558 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001559 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001560 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001561 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001562 data[0] = data[0] ^ (unsigned long)-1;
1563 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001565 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001566 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567 return NULL;
1568 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001569 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 }
1571
1572 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001573 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001574 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001575 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001576 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001577 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 return NULL;
1579 }
1580 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001581 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001582 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583}
1584
1585/* drbd_drain_block() just takes a data block
1586 * out of the socket input buffer, and discards it.
1587 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001588static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589{
1590 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001591 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592 void *data;
1593
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001594 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001595 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001596
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001597 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598
1599 data = kmap(page);
1600 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001601 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1602
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001603 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001604 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001606 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001607 }
1608 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001609 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001610 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611}
1612
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001613static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614 sector_t sector, int data_size)
1615{
Kent Overstreet79886132013-11-23 17:19:00 -08001616 struct bio_vec bvec;
1617 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001619 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001620 void *dig_in = peer_device->connection->int_dig_in;
1621 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001623 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001624 if (peer_device->connection->peer_integrity_tfm) {
1625 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1626 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001627 if (err)
1628 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001629 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 }
1631
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632 /* optimistically update recv_cnt. if receiving fails below,
1633 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001634 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635
1636 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001637 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638
Kent Overstreet79886132013-11-23 17:19:00 -08001639 bio_for_each_segment(bvec, bio, iter) {
1640 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1641 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001642 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001643 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001644 if (err)
1645 return err;
1646 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001647 }
1648
1649 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001650 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001652 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001653 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001654 }
1655 }
1656
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001657 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001658 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659}
1660
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001661/*
1662 * e_end_resync_block() is called in asender context via
1663 * drbd_finish_peer_reqs().
1664 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001665static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001667 struct drbd_device_work *dw = device_work(w);
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001668 struct drbd_peer_request *peer_req =
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001669 container_of(dw, struct drbd_peer_request, dw);
1670 struct drbd_device *device = dw->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001671 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001672 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001674 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001676 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001677 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001678 err = drbd_send_ack(first_peer_device(device), P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679 } else {
1680 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001681 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001683 err = drbd_send_ack(first_peer_device(device), P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001685 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001687 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001688}
1689
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001690static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1691 int data_size) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001693 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001694 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001696 peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001697 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001698 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001702 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703 /* corresponding dec_unacked() in e_end_resync_block()
1704 * respective _drbd_clear_done_ee */
1705
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001706 peer_req->dw.w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001707
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001708 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001709 list_add(&peer_req->dw.w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001710 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001712 atomic_add(data_size >> 9, &device->rs_sect_ev);
1713 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001714 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001716 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001717 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001718 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001719 list_del(&peer_req->dw.w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001720 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001721
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001723fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001724 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001725 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726}
1727
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001728static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001730 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731{
1732 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001733
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001734 /* Request object according to our peer */
1735 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001736 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001737 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001738 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001739 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001740 (unsigned long)id, (unsigned long long)sector);
1741 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001742 return NULL;
1743}
1744
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001745static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001747 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001748 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001749 struct drbd_request *req;
1750 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001751 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001752 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001753
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001754 peer_device = conn_peer_device(connection, pi->vnr);
1755 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001756 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001757 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758
1759 sector = be64_to_cpu(p->sector);
1760
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001761 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001762 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001763 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001764 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001765 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766
Bart Van Assche24c48302011-05-21 18:32:29 +02001767 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768 * special casing it there for the various failure cases.
1769 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001770 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001771 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001772 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773 /* else: nothing. handled from drbd_disconnect...
1774 * I don't think we may complete this just yet
1775 * in case we are "on-disconnect: freeze" */
1776
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001777 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778}
1779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001780static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001781{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001782 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001783 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001785 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001786 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001787
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001788 peer_device = conn_peer_device(connection, pi->vnr);
1789 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001790 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001791 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792
1793 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001794 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001796 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 /* data is submitted to disk within recv_resync_read.
1798 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001799 * or in drbd_peer_request_endio. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001800 err = recv_resync_read(peer_device, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801 } else {
1802 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001803 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001805 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001807 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808 }
1809
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001810 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001811
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001812 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813}
1814
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001815static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001816 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001817{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001818 struct drbd_interval *i;
1819 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001821 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001822 if (!i->local)
1823 continue;
1824 req = container_of(i, struct drbd_request, i);
1825 if (req->rq_state & RQ_LOCAL_PENDING ||
1826 !(req->rq_state & RQ_POSTPONED))
1827 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001828 /* as it is RQ_POSTPONED, this will cause it to
1829 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001830 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001831 }
1832}
1833
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001834/*
1835 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001836 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001837static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001838{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001839 struct drbd_device_work *dw = device_work(w);
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001840 struct drbd_peer_request *peer_req =
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001841 container_of(dw, struct drbd_peer_request, dw);
1842 struct drbd_device *device = dw->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001843 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001844 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001845
Philipp Reisner303d1442011-04-13 16:24:47 -07001846 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001847 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001848 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1849 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001850 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001852 err = drbd_send_ack(first_peer_device(device), pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001853 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001854 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001856 err = drbd_send_ack(first_peer_device(device), P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001857 /* we expect it to be marked out of sync anyways...
1858 * maybe assert this? */
1859 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001860 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861 }
1862 /* we delete from the conflict detection hash _after_ we sent out the
1863 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001864 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001865 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001866 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001867 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001868 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001869 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001870 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001871 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001872 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001874 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001876 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877}
1878
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001879static int e_send_ack(struct drbd_device_work *dw, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001881 struct drbd_device *device = dw->device;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001882 struct drbd_peer_request *peer_req =
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001883 container_of(dw, struct drbd_peer_request, dw);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001884 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001886 err = drbd_send_ack(first_peer_device(device), ack, peer_req);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001887 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001889 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001890}
1891
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001892static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001893{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001894 return e_send_ack(device_work(w), P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001895}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001896
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001897static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001898{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001899 struct drbd_device_work *dw = device_work(w);
1900 struct drbd_connection *connection = first_peer_device(dw->device)->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001901
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001902 return e_send_ack(dw, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001903 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001904}
1905
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001906static bool seq_greater(u32 a, u32 b)
1907{
1908 /*
1909 * We assume 32-bit wrap-around here.
1910 * For 24-bit wrap-around, we would have to shift:
1911 * a <<= 8; b <<= 8;
1912 */
1913 return (s32)a - (s32)b > 0;
1914}
1915
1916static u32 seq_max(u32 a, u32 b)
1917{
1918 return seq_greater(a, b) ? a : b;
1919}
1920
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001921static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001922{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001923 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001924 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001925
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001926 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001927 spin_lock(&device->peer_seq_lock);
1928 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1929 device->peer_seq = newest_peer_seq;
1930 spin_unlock(&device->peer_seq_lock);
1931 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001932 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001933 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001934 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001935}
1936
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001937static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1938{
1939 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1940}
1941
1942/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001943static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001944{
1945 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001946 bool rv = 0;
1947
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001948 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001949 list_for_each_entry(rs_req, &device->sync_ee, dw.w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001950 if (overlaps(peer_req->i.sector, peer_req->i.size,
1951 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001952 rv = 1;
1953 break;
1954 }
1955 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001956 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001957
1958 return rv;
1959}
1960
Philipp Reisnerb411b362009-09-25 16:07:19 -07001961/* Called from receive_Data.
1962 * Synchronize packets on sock with packets on msock.
1963 *
1964 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1965 * packet traveling on msock, they are still processed in the order they have
1966 * been sent.
1967 *
1968 * Note: we don't care for Ack packets overtaking P_DATA packets.
1969 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001970 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001972 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07001973 * ourselves. Correctly handles 32bit wrap around.
1974 *
1975 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1976 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1977 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1978 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1979 *
1980 * returns 0 if we may process the packet,
1981 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001982static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001983{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001984 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001986 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001987 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001988
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001989 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001990 return 0;
1991
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001992 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001993 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001994 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
1995 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001996 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001997 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001998
Philipp Reisnerb411b362009-09-25 16:07:19 -07001999 if (signal_pending(current)) {
2000 ret = -ERESTARTSYS;
2001 break;
2002 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002003
2004 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002005 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002006 rcu_read_unlock();
2007
2008 if (!tp)
2009 break;
2010
2011 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002012 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2013 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002014 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002015 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002016 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002017 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002018 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002019 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002021 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022 break;
2023 }
2024 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002025 spin_unlock(&device->peer_seq_lock);
2026 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027 return ret;
2028}
2029
Lars Ellenberg688593c2010-11-17 22:25:03 +01002030/* see also bio_flags_to_wire()
2031 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2032 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002033static unsigned long wire_flags_to_bio(struct drbd_device *device, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002034{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002035 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2036 (dpf & DP_FUA ? REQ_FUA : 0) |
2037 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2038 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002039}
2040
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002041static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002042 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002044 struct drbd_interval *i;
2045
2046 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002047 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002048 struct drbd_request *req;
2049 struct bio_and_error m;
2050
2051 if (!i->local)
2052 continue;
2053 req = container_of(i, struct drbd_request, i);
2054 if (!(req->rq_state & RQ_POSTPONED))
2055 continue;
2056 req->rq_state &= ~RQ_POSTPONED;
2057 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002058 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002059 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002060 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002061 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002062 goto repeat;
2063 }
2064}
2065
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002066static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002067 struct drbd_peer_request *peer_req)
2068{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002069 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002070 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002071 sector_t sector = peer_req->i.sector;
2072 const unsigned int size = peer_req->i.size;
2073 struct drbd_interval *i;
2074 bool equal;
2075 int err;
2076
2077 /*
2078 * Inserting the peer request into the write_requests tree will prevent
2079 * new conflicting local requests from being added.
2080 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002081 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002082
2083 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002084 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002085 if (i == &peer_req->i)
2086 continue;
2087
2088 if (!i->local) {
2089 /*
2090 * Our peer has sent a conflicting remote request; this
2091 * should not happen in a two-node setup. Wait for the
2092 * earlier peer request to complete.
2093 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002094 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002095 if (err)
2096 goto out;
2097 goto repeat;
2098 }
2099
2100 equal = i->sector == sector && i->size == size;
2101 if (resolve_conflicts) {
2102 /*
2103 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002104 * overlapping request, it can be considered overwritten
2105 * and thus superseded; otherwise, it will be retried
2106 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002107 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002108 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002109 (i->size >> 9) >= sector + (size >> 9);
2110
2111 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002112 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002113 "local=%llus +%u, remote=%llus +%u, "
2114 "assuming %s came first\n",
2115 (unsigned long long)i->sector, i->size,
2116 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002117 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002118
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002119 inc_unacked(device);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002120 peer_req->dw.w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002121 e_send_retry_write;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002122 list_add_tail(&peer_req->dw.w.list, &device->done_ee);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002123 wake_asender(first_peer_device(device)->connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002124
2125 err = -ENOENT;
2126 goto out;
2127 } else {
2128 struct drbd_request *req =
2129 container_of(i, struct drbd_request, i);
2130
2131 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002132 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002133 "local=%llus +%u, remote=%llus +%u\n",
2134 (unsigned long long)i->sector, i->size,
2135 (unsigned long long)sector, size);
2136
2137 if (req->rq_state & RQ_LOCAL_PENDING ||
2138 !(req->rq_state & RQ_POSTPONED)) {
2139 /*
2140 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002141 * decide if this request has been superseded
2142 * or needs to be retried.
2143 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002144 * disappear from the write_requests tree.
2145 *
2146 * In addition, wait for the conflicting
2147 * request to finish locally before submitting
2148 * the conflicting peer request.
2149 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002150 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002151 if (err) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002152 _conn_request_state(first_peer_device(device)->connection,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002153 NS(conn, C_TIMEOUT),
2154 CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002155 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002156 goto out;
2157 }
2158 goto repeat;
2159 }
2160 /*
2161 * Remember to restart the conflicting requests after
2162 * the new peer request has completed.
2163 */
2164 peer_req->flags |= EE_RESTART_REQUESTS;
2165 }
2166 }
2167 err = 0;
2168
2169 out:
2170 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002171 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002172 return err;
2173}
2174
Philipp Reisnerb411b362009-09-25 16:07:19 -07002175/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002176static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002177{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002178 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002179 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002181 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002182 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002183 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002184 int rw = WRITE;
2185 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002186 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002187
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002188 peer_device = conn_peer_device(connection, pi->vnr);
2189 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002190 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002191 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002192
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002193 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002194 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002196 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2197 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002198 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002199 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002200 if (!err)
2201 err = err2;
2202 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203 }
2204
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002205 /*
2206 * Corresponding put_ldev done either below (on various errors), or in
2207 * drbd_peer_request_endio, if we successfully submit the data at the
2208 * end of this function.
2209 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210
2211 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002212 peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002213 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002214 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002215 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216 }
2217
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002218 peer_req->dw.w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219
Lars Ellenberg688593c2010-11-17 22:25:03 +01002220 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002221 rw |= wire_flags_to_bio(device, dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002222 if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002223 D_ASSERT(device, peer_req->i.size == 0);
2224 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002225 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002226
2227 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002228 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002229
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002230 spin_lock(&connection->epoch_lock);
2231 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002232 atomic_inc(&peer_req->epoch->epoch_size);
2233 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002234 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002235
Philipp Reisner302bdea2011-04-21 11:36:49 +02002236 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002237 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002238 rcu_read_unlock();
2239 if (tp) {
2240 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002241 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002242 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002243 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002244 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002245 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002246 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002247 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002248 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002249 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002250 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002251 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002252 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002253 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002254 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002255 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002256 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002257 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002258 list_add(&peer_req->dw.w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002259 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002261 if (device->state.conn == C_SYNC_TARGET)
2262 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002263
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002264 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002265 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002266 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002267 case DRBD_PROT_C:
2268 dp_flags |= DP_SEND_WRITE_ACK;
2269 break;
2270 case DRBD_PROT_B:
2271 dp_flags |= DP_SEND_RECEIVE_ACK;
2272 break;
2273 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002274 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002275 }
2276
2277 if (dp_flags & DP_SEND_WRITE_ACK) {
2278 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002279 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002280 /* corresponding dec_unacked() in e_end_block()
2281 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002282 }
2283
2284 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 /* I really don't like it that the receiver thread
2286 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002287 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 }
2289
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002290 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002291 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002292 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002293 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2294 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002295 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002296 }
2297
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002298 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002299 if (!err)
2300 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002301
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002302 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002303 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002304 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002305 list_del(&peer_req->dw.w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002306 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002307 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002308 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002309 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002310
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002312 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002313 put_ldev(device);
2314 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002315 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316}
2317
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002318/* We may throttle resync, if the lower device seems to be busy,
2319 * and current sync rate is above c_min_rate.
2320 *
2321 * To decide whether or not the lower device is busy, we use a scheme similar
2322 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2323 * (more than 64 sectors) of activity we cannot account for with our own resync
2324 * activity, it obviously is "busy".
2325 *
2326 * The current sync rate used here uses only the most recent two step marks,
2327 * to have a short time average so we can react faster.
2328 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002329int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002330{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002331 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002332 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002333 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002334 int curr_events;
2335 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002336 unsigned int c_min_rate;
2337
2338 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002339 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002340 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002341
2342 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002343 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002344 return 0;
2345
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002346 spin_lock_irq(&device->al_lock);
2347 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
Philipp Reisnere3555d82010-11-07 15:56:29 +01002348 if (tmp) {
2349 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2350 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002351 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002352 return 0;
2353 }
2354 /* Do not slow down if app IO is already waiting for this extent */
2355 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002356 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002357
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002358 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2359 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002360 atomic_read(&device->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002361
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002362 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002363 unsigned long rs_left;
2364 int i;
2365
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002366 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002367
2368 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2369 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002370 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002371
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002372 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2373 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002374 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002375 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002376
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002377 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002378 if (!dt)
2379 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002380 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002381 dbdt = Bit2KB(db/dt);
2382
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002383 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002384 throttle = 1;
2385 }
2386 return throttle;
2387}
2388
2389
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002390static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002391{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002392 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002393 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002395 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002396 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002398 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002399 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002400 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002401
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002402 peer_device = conn_peer_device(connection, pi->vnr);
2403 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002404 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002405 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002406 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407
2408 sector = be64_to_cpu(p->sector);
2409 size = be32_to_cpu(p->blksize);
2410
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002411 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002412 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002413 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002414 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002415 }
2416 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002417 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002418 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002419 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002420 }
2421
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002422 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002423 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002424 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002425 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002426 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002427 break;
2428 case P_RS_DATA_REQUEST:
2429 case P_CSUM_RS_REQUEST:
2430 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002431 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002432 break;
2433 case P_OV_REPLY:
2434 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002435 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002436 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002437 break;
2438 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002439 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002440 }
2441 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002442 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002444
Lars Ellenberga821cc42010-09-06 12:31:37 +02002445 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002446 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002447 }
2448
2449 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2450 * "criss-cross" setup, that might cause write-out on some other DRBD,
2451 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002452 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002453 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002454 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002455 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002456 }
2457
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002458 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002459 case P_DATA_REQUEST:
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002460 peer_req->dw.w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002462 /* application IO, don't drbd_rs_begin_io */
2463 goto submit;
2464
Philipp Reisnerb411b362009-09-25 16:07:19 -07002465 case P_RS_DATA_REQUEST:
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002466 peer_req->dw.w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002468 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002469 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002470 break;
2471
2472 case P_OV_REPLY:
2473 case P_CSUM_RS_REQUEST:
2474 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002475 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002476 if (!di)
2477 goto out_free_e;
2478
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002479 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002480 di->digest = (((char *)di)+sizeof(struct digest_info));
2481
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002482 peer_req->digest = di;
2483 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002484
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002485 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002486 goto out_free_e;
2487
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002488 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002489 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002490 peer_req->dw.w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002491 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002492 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002493 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002494 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002495 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002496 peer_req->dw.w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002497 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002498 /* drbd_rs_begin_io done when we sent this request,
2499 * but accounting still needs to be done. */
2500 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501 }
2502 break;
2503
2504 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002505 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002506 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002507 unsigned long now = jiffies;
2508 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002509 device->ov_start_sector = sector;
2510 device->ov_position = sector;
2511 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2512 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002513 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002514 device->rs_mark_left[i] = device->ov_left;
2515 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002516 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002517 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002518 (unsigned long long)sector);
2519 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002520 peer_req->dw.w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002522 break;
2523
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002525 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526 }
2527
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002528 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2529 * wrt the receiver, but it is not as straightforward as it may seem.
2530 * Various places in the resync start and stop logic assume resync
2531 * requests are processed in order, requeuing this on the worker thread
2532 * introduces a bunch of new code for synchronization between threads.
2533 *
2534 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2535 * "forever", throttling after drbd_rs_begin_io will lock that extent
2536 * for application writes for the same time. For now, just throttle
2537 * here, where the rest of the code expects the receiver to sleep for
2538 * a while, anyways.
2539 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002540
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002541 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2542 * this defers syncer requests for some time, before letting at least
2543 * on request through. The resync controller on the receiving side
2544 * will adapt to the incoming rate accordingly.
2545 *
2546 * We cannot throttle here if remote is Primary/SyncTarget:
2547 * we would also throttle its application reads.
2548 * In that case, throttling is done on the SyncTarget only.
2549 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002550 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002551 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002552 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002553 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002554
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002555submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002556 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002557
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002558submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002559 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002560 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002561 list_add_tail(&peer_req->dw.w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002562 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002564 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002565 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002566
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002567 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002568 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002569 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02002570 list_del(&peer_req->dw.w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002571 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002572 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2573
Philipp Reisnerb411b362009-09-25 16:07:19 -07002574out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002575 put_ldev(device);
2576 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002577 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002578}
2579
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002580/**
2581 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2582 */
2583static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002585 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002586 int self, peer, rv = -100;
2587 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002588 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002589
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002590 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2591 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002593 ch_peer = device->p_uuid[UI_SIZE];
2594 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595
Philipp Reisner44ed1672011-04-19 17:10:19 +02002596 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002597 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002598 rcu_read_unlock();
2599 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002600 case ASB_CONSENSUS:
2601 case ASB_DISCARD_SECONDARY:
2602 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002603 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002604 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002605 break;
2606 case ASB_DISCONNECT:
2607 break;
2608 case ASB_DISCARD_YOUNGER_PRI:
2609 if (self == 0 && peer == 1) {
2610 rv = -1;
2611 break;
2612 }
2613 if (self == 1 && peer == 0) {
2614 rv = 1;
2615 break;
2616 }
2617 /* Else fall through to one of the other strategies... */
2618 case ASB_DISCARD_OLDER_PRI:
2619 if (self == 0 && peer == 1) {
2620 rv = 1;
2621 break;
2622 }
2623 if (self == 1 && peer == 0) {
2624 rv = -1;
2625 break;
2626 }
2627 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002628 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629 "Using discard-least-changes instead\n");
2630 case ASB_DISCARD_ZERO_CHG:
2631 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002632 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002633 ? -1 : 1;
2634 break;
2635 } else {
2636 if (ch_peer == 0) { rv = 1; break; }
2637 if (ch_self == 0) { rv = -1; break; }
2638 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002639 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640 break;
2641 case ASB_DISCARD_LEAST_CHG:
2642 if (ch_self < ch_peer)
2643 rv = -1;
2644 else if (ch_self > ch_peer)
2645 rv = 1;
2646 else /* ( ch_self == ch_peer ) */
2647 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002648 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002649 ? -1 : 1;
2650 break;
2651 case ASB_DISCARD_LOCAL:
2652 rv = -1;
2653 break;
2654 case ASB_DISCARD_REMOTE:
2655 rv = 1;
2656 }
2657
2658 return rv;
2659}
2660
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002661/**
2662 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2663 */
2664static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002665{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002666 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002667 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002668 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002669
Philipp Reisner44ed1672011-04-19 17:10:19 +02002670 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002671 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002672 rcu_read_unlock();
2673 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 case ASB_DISCARD_YOUNGER_PRI:
2675 case ASB_DISCARD_OLDER_PRI:
2676 case ASB_DISCARD_LEAST_CHG:
2677 case ASB_DISCARD_LOCAL:
2678 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002679 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002680 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002681 break;
2682 case ASB_DISCONNECT:
2683 break;
2684 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002685 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002686 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002687 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002688 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002689 rv = hg;
2690 break;
2691 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002692 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002693 break;
2694 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002695 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002697 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002698 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002699 enum drbd_state_rv rv2;
2700
Philipp Reisnerb411b362009-09-25 16:07:19 -07002701 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2702 * we might be here in C_WF_REPORT_PARAMS which is transient.
2703 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002704 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002705 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002706 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002707 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002708 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709 rv = hg;
2710 }
2711 } else
2712 rv = hg;
2713 }
2714
2715 return rv;
2716}
2717
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002718/**
2719 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2720 */
2721static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002722{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002723 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002724 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002725 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002726
Philipp Reisner44ed1672011-04-19 17:10:19 +02002727 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002728 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002729 rcu_read_unlock();
2730 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002731 case ASB_DISCARD_YOUNGER_PRI:
2732 case ASB_DISCARD_OLDER_PRI:
2733 case ASB_DISCARD_LEAST_CHG:
2734 case ASB_DISCARD_LOCAL:
2735 case ASB_DISCARD_REMOTE:
2736 case ASB_CONSENSUS:
2737 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002738 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002739 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740 break;
2741 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002742 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 break;
2744 case ASB_DISCONNECT:
2745 break;
2746 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002747 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002748 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002749 enum drbd_state_rv rv2;
2750
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2752 * we might be here in C_WF_REPORT_PARAMS which is transient.
2753 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002754 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002755 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002756 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002757 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002758 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 rv = hg;
2760 }
2761 } else
2762 rv = hg;
2763 }
2764
2765 return rv;
2766}
2767
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002768static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002769 u64 bits, u64 flags)
2770{
2771 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002772 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773 return;
2774 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002775 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 text,
2777 (unsigned long long)uuid[UI_CURRENT],
2778 (unsigned long long)uuid[UI_BITMAP],
2779 (unsigned long long)uuid[UI_HISTORY_START],
2780 (unsigned long long)uuid[UI_HISTORY_END],
2781 (unsigned long long)bits,
2782 (unsigned long long)flags);
2783}
2784
2785/*
2786 100 after split brain try auto recover
2787 2 C_SYNC_SOURCE set BitMap
2788 1 C_SYNC_SOURCE use BitMap
2789 0 no Sync
2790 -1 C_SYNC_TARGET use BitMap
2791 -2 C_SYNC_TARGET set BitMap
2792 -100 after split brain, disconnect
2793-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002794-1091 requires proto 91
2795-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002797static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002798{
2799 u64 self, peer;
2800 int i, j;
2801
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002802 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2803 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804
2805 *rule_nr = 10;
2806 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2807 return 0;
2808
2809 *rule_nr = 20;
2810 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2811 peer != UUID_JUST_CREATED)
2812 return -2;
2813
2814 *rule_nr = 30;
2815 if (self != UUID_JUST_CREATED &&
2816 (peer == UUID_JUST_CREATED || peer == (u64)0))
2817 return 2;
2818
2819 if (self == peer) {
2820 int rct, dc; /* roles at crash time */
2821
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002822 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002824 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002825 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002827 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2828 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002829 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002830 drbd_uuid_move_history(device);
2831 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2832 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002833
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002834 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2835 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836 *rule_nr = 34;
2837 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002838 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002839 *rule_nr = 36;
2840 }
2841
2842 return 1;
2843 }
2844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002845 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002847 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002848 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002849
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002850 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2851 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002852 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002854 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2855 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2856 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002857
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002858 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002859 *rule_nr = 35;
2860 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002861 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862 *rule_nr = 37;
2863 }
2864
2865 return -1;
2866 }
2867
2868 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002869 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2870 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 /* lowest bit is set when we were primary,
2872 * next bit (weight 2) is set when peer was primary */
2873 *rule_nr = 40;
2874
2875 switch (rct) {
2876 case 0: /* !self_pri && !peer_pri */ return 0;
2877 case 1: /* self_pri && !peer_pri */ return 1;
2878 case 2: /* !self_pri && peer_pri */ return -1;
2879 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002880 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881 return dc ? -1 : 1;
2882 }
2883 }
2884
2885 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002886 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 if (self == peer)
2888 return -1;
2889
2890 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002891 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002893 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002894 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2895 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2896 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897 /* The last P_SYNC_UUID did not get though. Undo the last start of
2898 resync as sync source modifications of the peer's UUIDs. */
2899
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002900 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002901 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002903 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2904 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002905
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002906 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002907 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002908
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909 return -1;
2910 }
2911 }
2912
2913 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002914 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002915 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002916 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917 if (self == peer)
2918 return -2;
2919 }
2920
2921 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002922 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2923 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002924 if (self == peer)
2925 return 1;
2926
2927 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002928 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002930 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002931 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2932 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2933 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934 /* The last P_SYNC_UUID did not get though. Undo the last start of
2935 resync as sync source modifications of our UUIDs. */
2936
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002937 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002938 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002939
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002940 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2941 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002942
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002943 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002944 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2945 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002946
2947 return 1;
2948 }
2949 }
2950
2951
2952 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002953 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002955 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002956 if (self == peer)
2957 return 2;
2958 }
2959
2960 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002961 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2962 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963 if (self == peer && self != ((u64)0))
2964 return 100;
2965
2966 *rule_nr = 100;
2967 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002968 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002969 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002970 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 if (self == peer)
2972 return -100;
2973 }
2974 }
2975
2976 return -1000;
2977}
2978
2979/* drbd_sync_handshake() returns the new conn state on success, or
2980 CONN_MASK (-1) on failure.
2981 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002982static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
2983 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984 enum drbd_disk_state peer_disk) __must_hold(local)
2985{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002986 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002987 enum drbd_conns rv = C_MASK;
2988 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002989 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002990 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002992 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002994 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002996 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002997
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002998 spin_lock_irq(&device->ldev->md.uuid_lock);
2999 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3000 drbd_uuid_dump(device, "peer", device->p_uuid,
3001 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003002
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003003 hg = drbd_uuid_compare(device, &rule_nr);
3004 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003006 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007
3008 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003009 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010 return C_MASK;
3011 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003012 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003013 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003014 return C_MASK;
3015 }
3016
3017 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3018 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3019 int f = (hg == -100) || abs(hg) == 2;
3020 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3021 if (f)
3022 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003023 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024 hg > 0 ? "source" : "target");
3025 }
3026
Adam Gandelman3a11a482010-04-08 16:48:23 -07003027 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003028 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003029
Philipp Reisner44ed1672011-04-19 17:10:19 +02003030 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003031 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003032
3033 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003034 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003035 + (peer_role == R_PRIMARY);
3036 int forced = (hg == -100);
3037
3038 switch (pcount) {
3039 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003040 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003041 break;
3042 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003043 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 break;
3045 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003046 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003047 break;
3048 }
3049 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003050 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003051 "automatically solved. Sync from %s node\n",
3052 pcount, (hg < 0) ? "peer" : "this");
3053 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003054 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055 " UUIDs where ambiguous.\n");
3056 hg = hg*2;
3057 }
3058 }
3059 }
3060
3061 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003062 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003063 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003064 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065 hg = 1;
3066
3067 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003068 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069 "Sync from %s node\n",
3070 (hg < 0) ? "peer" : "this");
3071 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003072 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003073 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003074 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003075
3076 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003077 /* FIXME this log message is not correct if we end up here
3078 * after an attempted attach on a diskless node.
3079 * We just refuse to attach -- well, we drop the "connection"
3080 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003081 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003082 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 return C_MASK;
3084 }
3085
3086 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003087 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003088 return C_MASK;
3089 }
3090
3091 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003092 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003093 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003094 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003095 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 /* fall through */
3097 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003098 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003099 return C_MASK;
3100 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003101 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003102 "assumption\n");
3103 }
3104 }
3105
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003106 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003107 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003108 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003109 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003110 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003111 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3112 abs(hg) >= 2 ? "full" : "bit-map based");
3113 return C_MASK;
3114 }
3115
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003117 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003118 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003119 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 return C_MASK;
3121 }
3122
3123 if (hg > 0) { /* become sync source. */
3124 rv = C_WF_BITMAP_S;
3125 } else if (hg < 0) { /* become sync target */
3126 rv = C_WF_BITMAP_T;
3127 } else {
3128 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003129 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003130 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003131 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003132 }
3133 }
3134
3135 return rv;
3136}
3137
Philipp Reisnerf179d762011-05-16 17:31:47 +02003138static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003139{
3140 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003141 if (peer == ASB_DISCARD_REMOTE)
3142 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143
3144 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003145 if (peer == ASB_DISCARD_LOCAL)
3146 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147
3148 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003149 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150}
3151
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003152static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003153{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003154 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003155 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3156 int p_proto, p_discard_my_data, p_two_primaries, cf;
3157 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3158 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003159 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003160 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 p_proto = be32_to_cpu(p->protocol);
3163 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3164 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3165 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003166 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003167 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003168 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003169
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003170 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003171 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003172
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003173 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003174 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003175 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003176 if (err)
3177 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003178 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 }
3180
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003181 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003182 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003183
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003184 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003185 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003186
3187 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003188 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003189
3190 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003191 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003192 goto disconnect_rcu_unlock;
3193 }
3194
3195 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003196 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003197 goto disconnect_rcu_unlock;
3198 }
3199
3200 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003201 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003202 goto disconnect_rcu_unlock;
3203 }
3204
3205 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003206 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003207 goto disconnect_rcu_unlock;
3208 }
3209
3210 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003211 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003212 goto disconnect_rcu_unlock;
3213 }
3214
3215 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003216 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003217 goto disconnect_rcu_unlock;
3218 }
3219
3220 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003221 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003222 goto disconnect_rcu_unlock;
3223 }
3224
3225 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 }
3227
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003228 if (integrity_alg[0]) {
3229 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003231 /*
3232 * We can only change the peer data integrity algorithm
3233 * here. Changing our own data integrity algorithm
3234 * requires that we send a P_PROTOCOL_UPDATE packet at
3235 * the same time; otherwise, the peer has no way to
3236 * tell between which packets the algorithm should
3237 * change.
3238 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003240 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3241 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003242 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003243 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 goto disconnect;
3245 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003246
3247 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3248 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3249 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3250 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003251 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003252 goto disconnect;
3253 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003254 }
3255
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003256 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3257 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003258 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003259 goto disconnect;
3260 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003261
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003262 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003263 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003264 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003265 *new_net_conf = *old_net_conf;
3266
3267 new_net_conf->wire_protocol = p_proto;
3268 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3269 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3270 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3271 new_net_conf->two_primaries = p_two_primaries;
3272
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003273 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003274 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003275 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003276
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003277 crypto_free_hash(connection->peer_integrity_tfm);
3278 kfree(connection->int_dig_in);
3279 kfree(connection->int_dig_vv);
3280 connection->peer_integrity_tfm = peer_integrity_tfm;
3281 connection->int_dig_in = int_dig_in;
3282 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003283
3284 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003285 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003286 integrity_alg[0] ? integrity_alg : "(none)");
3287
3288 synchronize_rcu();
3289 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003290 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003291
Philipp Reisner44ed1672011-04-19 17:10:19 +02003292disconnect_rcu_unlock:
3293 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003294disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003295 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003296 kfree(int_dig_in);
3297 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003298 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003299 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003300}
3301
3302/* helper function
3303 * input: alg name, feature name
3304 * return: NULL (alg name was "")
3305 * ERR_PTR(error) if something goes wrong
3306 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303307static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003308struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 const char *alg, const char *name)
3310{
3311 struct crypto_hash *tfm;
3312
3313 if (!alg[0])
3314 return NULL;
3315
3316 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3317 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003318 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319 alg, name, PTR_ERR(tfm));
3320 return tfm;
3321 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 return tfm;
3323}
3324
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003325static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003327 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003328 int size = pi->size;
3329
3330 while (size) {
3331 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003332 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003333 if (s <= 0) {
3334 if (s < 0)
3335 return s;
3336 break;
3337 }
3338 size -= s;
3339 }
3340 if (size)
3341 return -EIO;
3342 return 0;
3343}
3344
3345/*
3346 * config_unknown_volume - device configuration command for unknown volume
3347 *
3348 * When a device is added to an existing connection, the node on which the
3349 * device is added first will send configuration commands to its peer but the
3350 * peer will not know about the device yet. It will warn and ignore these
3351 * commands. Once the device is added on the second node, the second node will
3352 * send the same device configuration commands, but in the other direction.
3353 *
3354 * (We can also end up here if drbd is misconfigured.)
3355 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003356static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003357{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003358 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003359 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003360 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003361}
3362
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003363static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003364{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003365 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003366 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003367 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368 unsigned int header_size, data_size, exp_max_sz;
3369 struct crypto_hash *verify_tfm = NULL;
3370 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003371 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003372 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003373 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003374 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003375 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003376 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003377
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003378 peer_device = conn_peer_device(connection, pi->vnr);
3379 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003380 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003381 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003382
3383 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3384 : apv == 88 ? sizeof(struct p_rs_param)
3385 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003386 : apv <= 94 ? sizeof(struct p_rs_param_89)
3387 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003388
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003389 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003390 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003391 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003392 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003393 }
3394
3395 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003396 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003397 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003398 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003399 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003400 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003401 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003402 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003403 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003404 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003405 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003406 }
3407
3408 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003409 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3411
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003412 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003413 if (err)
3414 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003416 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003417 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003418 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003419 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3420 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003421 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003422 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003423 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003424 return -ENOMEM;
3425 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003426
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003427 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003428 *new_disk_conf = *old_disk_conf;
3429
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003430 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003431 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432
3433 if (apv >= 88) {
3434 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003435 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003436 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003437 "peer wants %u, accepting only up to %u byte\n",
3438 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003439 err = -EIO;
3440 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003441 }
3442
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003443 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003444 if (err)
3445 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446 /* we expect NUL terminated string */
3447 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003448 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003449 p->verify_alg[data_size-1] = 0;
3450
3451 } else /* apv >= 89 */ {
3452 /* we still expect NUL terminated strings */
3453 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003454 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3455 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3457 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3458 }
3459
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003460 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003461 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003462 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003463 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464 goto disconnect;
3465 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003466 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003467 p->verify_alg, "verify-alg");
3468 if (IS_ERR(verify_tfm)) {
3469 verify_tfm = NULL;
3470 goto disconnect;
3471 }
3472 }
3473
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003474 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003475 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003476 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003477 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478 goto disconnect;
3479 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003480 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481 p->csums_alg, "csums-alg");
3482 if (IS_ERR(csums_tfm)) {
3483 csums_tfm = NULL;
3484 goto disconnect;
3485 }
3486 }
3487
Philipp Reisner813472c2011-05-03 16:47:02 +02003488 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003489 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3490 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3491 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3492 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003493
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003494 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003495 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003496 new_plan = fifo_alloc(fifo_size);
3497 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003498 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003499 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003500 goto disconnect;
3501 }
3502 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003503 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003505 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003506 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3507 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003508 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003509 goto disconnect;
3510 }
3511
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003512 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003513
3514 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003515 strcpy(new_net_conf->verify_alg, p->verify_alg);
3516 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003517 crypto_free_hash(peer_device->connection->verify_tfm);
3518 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003519 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003520 }
3521 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003522 strcpy(new_net_conf->csums_alg, p->csums_alg);
3523 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003524 crypto_free_hash(peer_device->connection->csums_tfm);
3525 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003526 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003527 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003528 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003530 }
3531
Philipp Reisner813472c2011-05-03 16:47:02 +02003532 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003533 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3534 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003535 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003536
3537 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003538 old_plan = device->rs_plan_s;
3539 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003540 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003541
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003542 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003543 synchronize_rcu();
3544 if (new_net_conf)
3545 kfree(old_net_conf);
3546 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003547 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003548
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003549 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003550
Philipp Reisner813472c2011-05-03 16:47:02 +02003551reconnect:
3552 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003553 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003554 kfree(new_disk_conf);
3555 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003556 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003557 return -EIO;
3558
Philipp Reisnerb411b362009-09-25 16:07:19 -07003559disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003560 kfree(new_plan);
3561 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003562 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003563 kfree(new_disk_conf);
3564 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003565 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003566 /* just for completeness: actually not needed,
3567 * as this is not reached if csums_tfm was ok. */
3568 crypto_free_hash(csums_tfm);
3569 /* but free the verify_tfm again, if csums_tfm did not work out */
3570 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003571 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003572 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003573}
3574
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003576static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003577 const char *s, sector_t a, sector_t b)
3578{
3579 sector_t d;
3580 if (a == 0 || b == 0)
3581 return;
3582 d = (a > b) ? (a - b) : (b - a);
3583 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003584 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003585 (unsigned long long)a, (unsigned long long)b);
3586}
3587
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003588static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003590 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003591 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003592 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003593 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003594 sector_t p_size, p_usize, my_usize;
3595 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003596 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003598 peer_device = conn_peer_device(connection, pi->vnr);
3599 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003600 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003601 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003602
Philipp Reisnerb411b362009-09-25 16:07:19 -07003603 p_size = be64_to_cpu(p->d_size);
3604 p_usize = be64_to_cpu(p->u_size);
3605
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606 /* just store the peer's disk size for now.
3607 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003608 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003609
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003610 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003611 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003612 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003613 rcu_read_unlock();
3614
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003615 warn_if_differ_considerably(device, "lower level device sizes",
3616 p_size, drbd_get_max_capacity(device->ldev));
3617 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003618 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619
3620 /* if this is the first connect, or an otherwise expected
3621 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003622 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003623 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003624
3625 /* Never shrink a device with usable data during connect.
3626 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003627 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3628 drbd_get_capacity(device->this_bdev) &&
3629 device->state.disk >= D_OUTDATED &&
3630 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003631 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003632 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003633 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003634 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003636
3637 if (my_usize != p_usize) {
3638 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3639
3640 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3641 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003642 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003643 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003644 return -ENOMEM;
3645 }
3646
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003647 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003648 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003649 *new_disk_conf = *old_disk_conf;
3650 new_disk_conf->disk_size = p_usize;
3651
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003652 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003653 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003654 synchronize_rcu();
3655 kfree(old_disk_conf);
3656
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003657 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003658 (unsigned long)my_usize);
3659 }
3660
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003661 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003662 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663
Philipp Reisnere89b5912010-03-24 17:11:33 +01003664 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003665 if (get_ldev(device)) {
3666 dd = drbd_determine_dev_size(device, ddsf, NULL);
3667 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003668 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003669 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003670 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003671 } else {
3672 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003673 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674 }
3675
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003676 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3677 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003678
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003679 if (get_ldev(device)) {
3680 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3681 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003682 ldsc = 1;
3683 }
3684
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003685 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003686 }
3687
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003688 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003689 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003690 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691 /* we have different sizes, probably peer
3692 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003693 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003694 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003695 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3696 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3697 if (device->state.pdsk >= D_INCONSISTENT &&
3698 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003699 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003700 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003701 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003702 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003703 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003704 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705 }
3706 }
3707
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003708 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709}
3710
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003711static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003713 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003714 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003715 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003717 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003719 peer_device = conn_peer_device(connection, pi->vnr);
3720 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003721 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003722 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003723
Philipp Reisnerb411b362009-09-25 16:07:19 -07003724 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003725 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003726 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003727 return false;
3728 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003729
3730 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3731 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3732
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003733 kfree(device->p_uuid);
3734 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003736 if (device->state.conn < C_CONNECTED &&
3737 device->state.disk < D_INCONSISTENT &&
3738 device->state.role == R_PRIMARY &&
3739 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003740 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003741 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003742 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003743 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744 }
3745
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003746 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003747 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003748 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003749 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003750 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003751 (p_uuid[UI_FLAGS] & 8);
3752 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003753 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003754 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003755 "clear_n_write from receive_uuids",
3756 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003757 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3758 _drbd_uuid_set(device, UI_BITMAP, 0);
3759 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003761 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003762 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003763 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003764 put_ldev(device);
3765 } else if (device->state.disk < D_INCONSISTENT &&
3766 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003767 /* I am a diskless primary, the peer just created a new current UUID
3768 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003769 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770 }
3771
3772 /* Before we test for the disk state, we should wait until an eventually
3773 ongoing cluster wide state change is finished. That is important if
3774 we are primary and are detaching from our disk. We need to see the
3775 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003776 mutex_lock(device->state_mutex);
3777 mutex_unlock(device->state_mutex);
3778 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3779 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003780
3781 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003782 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003783
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003784 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003785}
3786
3787/**
3788 * convert_state() - Converts the peer's view of the cluster state to our point of view
3789 * @ps: The state as seen by the peer.
3790 */
3791static union drbd_state convert_state(union drbd_state ps)
3792{
3793 union drbd_state ms;
3794
3795 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003796 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003797 [C_CONNECTED] = C_CONNECTED,
3798
3799 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3800 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3801 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3802 [C_VERIFY_S] = C_VERIFY_T,
3803 [C_MASK] = C_MASK,
3804 };
3805
3806 ms.i = ps.i;
3807
3808 ms.conn = c_tab[ps.conn];
3809 ms.peer = ps.role;
3810 ms.role = ps.peer;
3811 ms.pdsk = ps.disk;
3812 ms.disk = ps.pdsk;
3813 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3814
3815 return ms;
3816}
3817
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003818static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003819{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003820 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003821 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003822 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003823 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003824 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003826 peer_device = conn_peer_device(connection, pi->vnr);
3827 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003828 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003829 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003830
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831 mask.i = be32_to_cpu(p->mask);
3832 val.i = be32_to_cpu(p->val);
3833
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003834 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003835 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003836 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003837 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003838 }
3839
3840 mask = convert_state(mask);
3841 val = convert_state(val);
3842
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003843 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003844 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003845
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003846 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003848 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849}
3850
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003851static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003853 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003854 union drbd_state mask, val;
3855 enum drbd_state_rv rv;
3856
3857 mask.i = be32_to_cpu(p->mask);
3858 val.i = be32_to_cpu(p->val);
3859
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003860 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3861 mutex_is_locked(&connection->cstate_mutex)) {
3862 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003863 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003864 }
3865
3866 mask = convert_state(mask);
3867 val = convert_state(val);
3868
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003869 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3870 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003871
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003872 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003873}
3874
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003875static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003877 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003878 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003879 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003880 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003882 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003883 int rv;
3884
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003885 peer_device = conn_peer_device(connection, pi->vnr);
3886 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003887 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003888 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003889
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890 peer_state.i = be32_to_cpu(p->state);
3891
3892 real_peer_disk = peer_state.disk;
3893 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003894 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003895 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003896 }
3897
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003898 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003899 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003900 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003901 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003902
Lars Ellenberg545752d2011-12-05 14:39:25 +01003903 /* If some other part of the code (asender thread, timeout)
3904 * already decided to close the connection again,
3905 * we must not "re-establish" it here. */
3906 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003907 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003908
Lars Ellenberg40424e42011-09-26 15:24:56 +02003909 /* If this is the "end of sync" confirmation, usually the peer disk
3910 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3911 * set) resync started in PausedSyncT, or if the timing of pause-/
3912 * unpause-sync events has been "just right", the peer disk may
3913 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3914 */
3915 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3916 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003917 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3918 /* If we are (becoming) SyncSource, but peer is still in sync
3919 * preparation, ignore its uptodate-ness to avoid flapping, it
3920 * will change to inconsistent once the peer reaches active
3921 * syncing states.
3922 * It may have changed syncer-paused flags, however, so we
3923 * cannot ignore this completely. */
3924 if (peer_state.conn > C_CONNECTED &&
3925 peer_state.conn < C_SYNC_SOURCE)
3926 real_peer_disk = D_INCONSISTENT;
3927
3928 /* if peer_state changes to connected at the same time,
3929 * it explicitly notifies us that it finished resync.
3930 * Maybe we should finish it up, too? */
3931 else if (os.conn >= C_SYNC_SOURCE &&
3932 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003933 if (drbd_bm_total_weight(device) <= device->rs_failed)
3934 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003935 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003936 }
3937 }
3938
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003939 /* explicit verify finished notification, stop sector reached. */
3940 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3941 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003942 ov_out_of_sync_print(device);
3943 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003944 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003945 }
3946
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003947 /* peer says his disk is inconsistent, while we think it is uptodate,
3948 * and this happens while the peer still thinks we have a sync going on,
3949 * but we think we are already done with the sync.
3950 * We ignore this to avoid flapping pdsk.
3951 * This should not happen, if the peer is a recent version of drbd. */
3952 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3953 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3954 real_peer_disk = D_UP_TO_DATE;
3955
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003956 if (ns.conn == C_WF_REPORT_PARAMS)
3957 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003958
Philipp Reisner67531712010-10-27 12:21:30 +02003959 if (peer_state.conn == C_AHEAD)
3960 ns.conn = C_BEHIND;
3961
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003962 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3963 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003964 int cr; /* consider resync */
3965
3966 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003967 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003968 /* if we had an established connection
3969 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003970 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003971 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003972 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973 /* if we have both been inconsistent, and the peer has been
3974 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003975 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003976 /* if we had been plain connected, and the admin requested to
3977 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003978 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979 (peer_state.conn >= C_STARTING_SYNC_S &&
3980 peer_state.conn <= C_WF_BITMAP_T));
3981
3982 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003983 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003984
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003985 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003986 if (ns.conn == C_MASK) {
3987 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003988 if (device->state.disk == D_NEGOTIATING) {
3989 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003990 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003991 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003993 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003994 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003995 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003996 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003997 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003998 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003999 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000 }
4001 }
4002 }
4003
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004004 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004005 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004006 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004007 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008 ns.peer = peer_state.role;
4009 ns.pdsk = real_peer_disk;
4010 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004011 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004012 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004013 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004014 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4015 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004016 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004017 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004018 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004019 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004020 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004021 drbd_uuid_new_current(device);
4022 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004023 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004024 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004025 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004026 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4027 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004028 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004029
4030 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004031 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004032 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033 }
4034
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004035 if (os.conn > C_WF_REPORT_PARAMS) {
4036 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037 peer_state.disk != D_NEGOTIATING ) {
4038 /* we want resync, peer has not yet decided to sync... */
4039 /* Nowadays only used when forcing a node into primary role and
4040 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004041 drbd_send_uuids(peer_device);
4042 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 }
4044 }
4045
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004046 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004048 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004049
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004050 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051}
4052
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004053static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004055 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004056 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004057 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004058
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004059 peer_device = conn_peer_device(connection, pi->vnr);
4060 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004061 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004062 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004063
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004064 wait_event(device->misc_wait,
4065 device->state.conn == C_WF_SYNC_UUID ||
4066 device->state.conn == C_BEHIND ||
4067 device->state.conn < C_CONNECTED ||
4068 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004070 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004071
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 /* Here the _drbd_uuid_ functions are right, current should
4073 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004074 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4075 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4076 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004077
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004078 drbd_print_uuids(device, "updated sync uuid");
4079 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004081 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004083 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004085 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086}
4087
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004088/**
4089 * receive_bitmap_plain
4090 *
4091 * Return 0 when done, 1 when another iteration is needed, and a negative error
4092 * code upon failure.
4093 */
4094static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004095receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004096 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004097{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004098 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004099 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004100 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004101 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004102 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004103 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004105 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004106 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004107 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 }
4109 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004110 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004111 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004112 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004113 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004115 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116
4117 c->word_offset += num_words;
4118 c->bit_offset = c->word_offset * BITS_PER_LONG;
4119 if (c->bit_offset > c->bm_bits)
4120 c->bit_offset = c->bm_bits;
4121
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004122 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004123}
4124
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004125static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4126{
4127 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4128}
4129
4130static int dcbp_get_start(struct p_compressed_bm *p)
4131{
4132 return (p->encoding & 0x80) != 0;
4133}
4134
4135static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4136{
4137 return (p->encoding >> 4) & 0x7;
4138}
4139
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004140/**
4141 * recv_bm_rle_bits
4142 *
4143 * Return 0 when done, 1 when another iteration is needed, and a negative error
4144 * code upon failure.
4145 */
4146static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004147recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004149 struct bm_xfer_ctx *c,
4150 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151{
4152 struct bitstream bs;
4153 u64 look_ahead;
4154 u64 rl;
4155 u64 tmp;
4156 unsigned long s = c->bit_offset;
4157 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004158 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159 int have;
4160 int bits;
4161
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004162 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004163
4164 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4165 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004166 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167
4168 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4169 bits = vli_decode_bits(&rl, look_ahead);
4170 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004171 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172
4173 if (toggle) {
4174 e = s + rl -1;
4175 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004176 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004177 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004179 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 }
4181
4182 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004183 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184 have, bits, look_ahead,
4185 (unsigned int)(bs.cur.b - p->code),
4186 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004187 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004189 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4190 if (likely(bits < 64))
4191 look_ahead >>= bits;
4192 else
4193 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194 have -= bits;
4195
4196 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4197 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004198 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199 look_ahead |= tmp << have;
4200 have += bits;
4201 }
4202
4203 c->bit_offset = s;
4204 bm_xfer_ctx_bit_to_word_offset(c);
4205
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004206 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207}
4208
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004209/**
4210 * decode_bitmap_c
4211 *
4212 * Return 0 when done, 1 when another iteration is needed, and a negative error
4213 * code upon failure.
4214 */
4215static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004216decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004217 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004218 struct bm_xfer_ctx *c,
4219 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004220{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004221 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004222 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223
4224 /* other variants had been implemented for evaluation,
4225 * but have been dropped as this one turned out to be "best"
4226 * during all our tests. */
4227
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004228 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4229 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004230 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231}
4232
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004233void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234 const char *direction, struct bm_xfer_ctx *c)
4235{
4236 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004237 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004238 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4239 unsigned int plain =
4240 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4241 c->bm_words * sizeof(unsigned long);
4242 unsigned int total = c->bytes[0] + c->bytes[1];
4243 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244
4245 /* total can not be zero. but just in case: */
4246 if (total == 0)
4247 return;
4248
4249 /* don't report if not compressed */
4250 if (total >= plain)
4251 return;
4252
4253 /* total < plain. check for overflow, still */
4254 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4255 : (1000 * total / plain);
4256
4257 if (r > 1000)
4258 r = 1000;
4259
4260 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004261 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262 "total %u; compression: %u.%u%%\n",
4263 direction,
4264 c->bytes[1], c->packets[1],
4265 c->bytes[0], c->packets[0],
4266 total, r/10, r % 10);
4267}
4268
4269/* Since we are processing the bitfield from lower addresses to higher,
4270 it does not matter if the process it in 32 bit chunks or 64 bit
4271 chunks as long as it is little endian. (Understand it as byte stream,
4272 beginning with the lowest byte...) If we would use big endian
4273 we would need to process it from the highest address to the lowest,
4274 in order to be agnostic to the 32 vs 64 bits issue.
4275
4276 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004277static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004279 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004280 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004281 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004282 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004283
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004284 peer_device = conn_peer_device(connection, pi->vnr);
4285 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004286 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004287 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004289 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004290 /* you are supposed to send additional out-of-sync information
4291 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004294 .bm_bits = drbd_bm_bits(device),
4295 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 };
4297
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004298 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004299 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004300 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004301 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302 /* MAYBE: sanity check that we speak proto >= 90,
4303 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004304 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004306 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004307 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004308 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004309 goto out;
4310 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004311 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004312 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004313 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004314 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004316 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004317 if (err)
4318 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004319 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004321 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004322 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323 goto out;
4324 }
4325
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004326 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004327 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004329 if (err <= 0) {
4330 if (err < 0)
4331 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004333 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004334 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004335 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004337 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004339 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004341 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004342 enum drbd_state_rv rv;
4343
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004344 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004345 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004346 goto out;
4347 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004348 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004349 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004350 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 /* admin may have requested C_DISCONNECTING,
4352 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004353 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004354 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004356 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004359 drbd_bm_unlock(device);
4360 if (!err && device->state.conn == C_WF_BITMAP_S)
4361 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004362 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004363}
4364
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004365static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004366{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004367 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004368 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004369
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004370 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371}
4372
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004373static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 /* Make sure we've acked all the TCP data associated
4376 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004377 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004379 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380}
4381
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004382static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004383{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004384 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004385 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004386 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004387
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004388 peer_device = conn_peer_device(connection, pi->vnr);
4389 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004390 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004391 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004393 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004394 case C_WF_SYNC_UUID:
4395 case C_WF_BITMAP_T:
4396 case C_BEHIND:
4397 break;
4398 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004399 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004400 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004401 }
4402
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004403 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004404
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004405 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004406}
4407
Philipp Reisner02918be2010-08-20 14:35:10 +02004408struct data_cmd {
4409 int expect_payload;
4410 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004411 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412};
4413
Philipp Reisner02918be2010-08-20 14:35:10 +02004414static struct data_cmd drbd_cmd_handler[] = {
4415 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4416 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4417 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4418 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004419 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4420 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4421 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004422 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4423 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004424 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4425 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004426 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4427 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4428 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4429 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4430 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4431 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4432 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4433 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4434 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4435 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004436 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004437 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004438 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004439};
4440
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004441static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004443 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004444 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004445 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004446
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004447 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004448 struct data_cmd *cmd;
4449
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004450 drbd_thread_current_set_cpu(&connection->receiver);
4451 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004452 goto err_out;
4453
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004454 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004455 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004456 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004457 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004458 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004459 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004461 shs = cmd->pkt_size;
4462 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004463 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004464 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004465 goto err_out;
4466 }
4467
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004468 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004469 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004470 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004471 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004472 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004473 }
4474
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004475 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004476 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004477 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004478 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004479 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004481 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004482 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004484 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004485 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004486}
4487
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004488static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004489{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004490 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004491 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004492 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004494 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496
Lars Ellenberg545752d2011-12-05 14:39:25 +01004497 /* We are about to start the cleanup after connection loss.
4498 * Make sure drbd_make_request knows about that.
4499 * Usually we should be in some network failure state already,
4500 * but just in case we are not, we fix it up here.
4501 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004502 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004503
Philipp Reisnerb411b362009-09-25 16:07:19 -07004504 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004505 drbd_thread_stop(&connection->asender);
4506 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004507
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004508 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004509 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4510 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004511 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004512 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004513 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004514 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004515 rcu_read_lock();
4516 }
4517 rcu_read_unlock();
4518
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004519 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004520 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004521 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004522 atomic_set(&connection->current_epoch->epoch_size, 0);
4523 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004524
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004525 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004527 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4528 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004529
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004530 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004531 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004532 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004533 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004534
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004535 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004536
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004537 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004538 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004539}
4540
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004541static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004542{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004543 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004544 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545
Philipp Reisner85719572010-07-21 10:20:17 +02004546 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004547 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004548 _drbd_wait_ee_list_empty(device, &device->active_ee);
4549 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4550 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004551 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552
4553 /* We do not have data structures that would allow us to
4554 * get the rs_pending_cnt down to 0 again.
4555 * * On C_SYNC_TARGET we do not have any data structures describing
4556 * the pending RSDataRequest's we have sent.
4557 * * On C_SYNC_SOURCE there is no data structure that tracks
4558 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4559 * And no, it is not the sum of the reference counts in the
4560 * resync_LRU. The resync_LRU tracks the whole operation including
4561 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4562 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004563 drbd_rs_cancel_all(device);
4564 device->rs_total = 0;
4565 device->rs_failed = 0;
4566 atomic_set(&device->rs_pending_cnt, 0);
4567 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004568
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004569 del_timer_sync(&device->resync_timer);
4570 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571
Philipp Reisnerb411b362009-09-25 16:07:19 -07004572 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4573 * w_make_resync_request etc. which may still be on the worker queue
4574 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004575 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004576
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004577 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004579 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4580 might have issued a work again. The one before drbd_finish_peer_reqs() is
4581 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004582 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004583
Lars Ellenberg08332d72012-08-17 15:09:13 +02004584 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4585 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004586 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004588 kfree(device->p_uuid);
4589 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004591 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004592 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004593
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004594 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004596 /* serialize with bitmap writeout triggered by the state change,
4597 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004598 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004599
Philipp Reisnerb411b362009-09-25 16:07:19 -07004600 /* tcp_close and release of sendpage pages can be deferred. I don't
4601 * want to use SO_LINGER, because apparently it can be deferred for
4602 * more than 20 seconds (longest time I checked).
4603 *
4604 * Actually we don't care for exactly when the network stack does its
4605 * put_page(), but release our reference on these pages right here.
4606 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004607 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004608 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004609 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004610 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004611 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004612 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004613 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004614 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004615 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004616
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004617 D_ASSERT(device, list_empty(&device->read_ee));
4618 D_ASSERT(device, list_empty(&device->active_ee));
4619 D_ASSERT(device, list_empty(&device->sync_ee));
4620 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621
Philipp Reisner360cc742011-02-08 14:29:53 +01004622 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623}
4624
4625/*
4626 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4627 * we can agree on is stored in agreed_pro_version.
4628 *
4629 * feature flags and the reserved array should be enough room for future
4630 * enhancements of the handshake protocol, and possible plugins...
4631 *
4632 * for now, they are expected to be zero, but ignored.
4633 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004634static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004636 struct drbd_socket *sock;
4637 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004638
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004639 sock = &connection->data;
4640 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004641 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004642 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643 memset(p, 0, sizeof(*p));
4644 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4645 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004646 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004647}
4648
4649/*
4650 * return values:
4651 * 1 yes, we have a valid connection
4652 * 0 oops, did not work out, please try again
4653 * -1 peer talks different language,
4654 * no point in trying again, please go standalone.
4655 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004656static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004658 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004659 struct p_connection_features *p;
4660 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004661 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004662 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004663
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004664 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004665 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666 return 0;
4667
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004668 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004669 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004670 return 0;
4671
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004672 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004673 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004674 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675 return -1;
4676 }
4677
Philipp Reisner77351055b2011-02-07 17:24:26 +01004678 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004679 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004680 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004681 return -1;
4682 }
4683
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004684 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004685 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004686 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689 p->protocol_min = be32_to_cpu(p->protocol_min);
4690 p->protocol_max = be32_to_cpu(p->protocol_max);
4691 if (p->protocol_max == 0)
4692 p->protocol_max = p->protocol_min;
4693
4694 if (PRO_VERSION_MAX < p->protocol_min ||
4695 PRO_VERSION_MIN > p->protocol_max)
4696 goto incompat;
4697
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004698 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004699
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004700 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004701 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702
4703 return 1;
4704
4705 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004706 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707 "I support %d-%d, peer supports %d-%d\n",
4708 PRO_VERSION_MIN, PRO_VERSION_MAX,
4709 p->protocol_min, p->protocol_max);
4710 return -1;
4711}
4712
4713#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004714static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004716 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4717 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004718 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719}
4720#else
4721#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004722
4723/* Return value:
4724 1 - auth succeeded,
4725 0 - failed, try again (network error),
4726 -1 - auth failed, don't try again.
4727*/
4728
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004729static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004731 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4733 struct scatterlist sg;
4734 char *response = NULL;
4735 char *right_response = NULL;
4736 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004737 unsigned int key_len;
4738 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004739 unsigned int resp_size;
4740 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004741 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004742 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004743 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004744
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004745 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4746
Philipp Reisner44ed1672011-04-19 17:10:19 +02004747 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004748 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004749 key_len = strlen(nc->shared_secret);
4750 memcpy(secret, nc->shared_secret, key_len);
4751 rcu_read_unlock();
4752
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004753 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754 desc.flags = 0;
4755
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004756 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004757 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004758 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004759 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004760 goto fail;
4761 }
4762
4763 get_random_bytes(my_challenge, CHALLENGE_LEN);
4764
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004765 sock = &connection->data;
4766 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004767 rv = 0;
4768 goto fail;
4769 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004770 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004771 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004772 if (!rv)
4773 goto fail;
4774
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004775 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004776 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777 rv = 0;
4778 goto fail;
4779 }
4780
Philipp Reisner77351055b2011-02-07 17:24:26 +01004781 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004782 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004783 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004784 rv = 0;
4785 goto fail;
4786 }
4787
Philipp Reisner77351055b2011-02-07 17:24:26 +01004788 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004789 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004790 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004791 goto fail;
4792 }
4793
Philipp Reisner77351055b2011-02-07 17:24:26 +01004794 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004795 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004796 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004797 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004798 goto fail;
4799 }
4800
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004801 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004802 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004803 rv = 0;
4804 goto fail;
4805 }
4806
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004807 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004808 response = kmalloc(resp_size, GFP_NOIO);
4809 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004810 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004811 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004812 goto fail;
4813 }
4814
4815 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004816 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817
4818 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4819 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004820 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004821 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822 goto fail;
4823 }
4824
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004825 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004826 rv = 0;
4827 goto fail;
4828 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004829 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004830 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004831 if (!rv)
4832 goto fail;
4833
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004834 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004835 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836 rv = 0;
4837 goto fail;
4838 }
4839
Philipp Reisner77351055b2011-02-07 17:24:26 +01004840 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004841 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004842 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843 rv = 0;
4844 goto fail;
4845 }
4846
Philipp Reisner77351055b2011-02-07 17:24:26 +01004847 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004848 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 rv = 0;
4850 goto fail;
4851 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004853 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004854 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004855 rv = 0;
4856 goto fail;
4857 }
4858
4859 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004860 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004861 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004862 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004863 goto fail;
4864 }
4865
4866 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4867
4868 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4869 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004870 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004871 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004872 goto fail;
4873 }
4874
4875 rv = !memcmp(response, right_response, resp_size);
4876
4877 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004878 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004879 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004880 else
4881 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004882
4883 fail:
4884 kfree(peers_ch);
4885 kfree(response);
4886 kfree(right_response);
4887
4888 return rv;
4889}
4890#endif
4891
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004892int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004893{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004894 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895 int h;
4896
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004897 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004898
4899 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004900 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004902 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004903 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004904 }
4905 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004906 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004907 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004908 }
4909 } while (h == 0);
4910
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004911 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004912 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004914 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004916 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004917 return 0;
4918}
4919
4920/* ********* acknowledge sender ******** */
4921
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004922static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004923{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004924 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004925 int retcode = be32_to_cpu(p->retcode);
4926
4927 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004928 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004929 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004930 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004931 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004932 drbd_set_st_err_str(retcode), retcode);
4933 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004934 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004935
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004936 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004937}
4938
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004939static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004940{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004941 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004942 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004943 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004944 int retcode = be32_to_cpu(p->retcode);
4945
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004946 peer_device = conn_peer_device(connection, pi->vnr);
4947 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004948 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004949 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004950
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004951 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004952 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004953 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004954 }
4955
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004956 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004957 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004958 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004959 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004960 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004961 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004962 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004963 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004964
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004965 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004966}
4967
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004968static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004969{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004970 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971
4972}
4973
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004974static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004975{
4976 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004977 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
4978 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
4979 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004980
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004981 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004982}
4983
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004984static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004985{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004986 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004987 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004988 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004989 sector_t sector = be64_to_cpu(p->sector);
4990 int blksize = be32_to_cpu(p->blksize);
4991
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004992 peer_device = conn_peer_device(connection, pi->vnr);
4993 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004994 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004995 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004996
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004997 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004998
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004999 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005000
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005001 if (get_ldev(device)) {
5002 drbd_rs_complete_io(device, sector);
5003 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005004 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005005 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5006 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005007 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005008 dec_rs_pending(device);
5009 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005010
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005011 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012}
5013
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005014static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005015validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005016 struct rb_root *root, const char *func,
5017 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005018{
5019 struct drbd_request *req;
5020 struct bio_and_error m;
5021
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005022 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005023 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005025 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005026 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027 }
5028 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005029 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005030
5031 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005032 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005033 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005034}
5035
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005036static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005037{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005038 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005039 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005040 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041 sector_t sector = be64_to_cpu(p->sector);
5042 int blksize = be32_to_cpu(p->blksize);
5043 enum drbd_req_event what;
5044
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005045 peer_device = conn_peer_device(connection, pi->vnr);
5046 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005047 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005048 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005049
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005050 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005051
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005052 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005053 drbd_set_in_sync(device, sector, blksize);
5054 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005055 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005056 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005057 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005059 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005060 break;
5061 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005062 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005063 break;
5064 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005065 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005066 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005067 case P_SUPERSEDED:
5068 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005069 break;
5070 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005071 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005072 break;
5073 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005074 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005075 }
5076
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005077 return validate_req_change_req_state(device, p->block_id, sector,
5078 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005079 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080}
5081
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005082static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005083{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005084 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005085 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005086 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005087 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005088 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005089 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005090
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005091 peer_device = conn_peer_device(connection, pi->vnr);
5092 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005093 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005094 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005095
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005096 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005097
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005098 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005099 dec_rs_pending(device);
5100 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005101 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005102 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005103
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005104 err = validate_req_change_req_state(device, p->block_id, sector,
5105 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005106 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005107 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005108 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5109 The master bio might already be completed, therefore the
5110 request is no longer in the collision hash. */
5111 /* In Protocol B we might already have got a P_RECV_ACK
5112 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005113 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005114 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005115 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005116}
5117
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005118static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005119{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005120 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005121 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005122 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005123 sector_t sector = be64_to_cpu(p->sector);
5124
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005125 peer_device = conn_peer_device(connection, pi->vnr);
5126 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005127 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005128 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005129
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005130 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005131
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005132 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005133 (unsigned long long)sector, be32_to_cpu(p->blksize));
5134
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005135 return validate_req_change_req_state(device, p->block_id, sector,
5136 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005137 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005138}
5139
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005140static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005142 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005143 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144 sector_t sector;
5145 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005146 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005147
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005148 peer_device = conn_peer_device(connection, pi->vnr);
5149 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005150 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005151 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005152
5153 sector = be64_to_cpu(p->sector);
5154 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005155
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005156 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005157
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005158 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005160 if (get_ldev_if_state(device, D_FAILED)) {
5161 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005162 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005163 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005164 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005165 case P_RS_CANCEL:
5166 break;
5167 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005168 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005169 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005170 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005171 }
5172
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005173 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005174}
5175
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005176static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005177{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005178 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005179 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005180 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005181
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005182 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005183
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005184 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005185 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5186 struct drbd_device *device = peer_device->device;
5187
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005188 if (device->state.conn == C_AHEAD &&
5189 atomic_read(&device->ap_in_flight) == 0 &&
5190 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5191 device->start_resync_timer.expires = jiffies + HZ;
5192 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005193 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005194 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005195 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005196
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005197 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005198}
5199
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005200static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005201{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005202 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005203 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005204 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005205 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005206 sector_t sector;
5207 int size;
5208
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005209 peer_device = conn_peer_device(connection, pi->vnr);
5210 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005211 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005212 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005213
Philipp Reisnerb411b362009-09-25 16:07:19 -07005214 sector = be64_to_cpu(p->sector);
5215 size = be32_to_cpu(p->blksize);
5216
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005217 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005218
5219 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005220 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005221 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005222 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005223
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005224 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005225 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005226
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005227 drbd_rs_complete_io(device, sector);
5228 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005229
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005230 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005231
5232 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005233 if ((device->ov_left & 0x200) == 0x200)
5234 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005235
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005236 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005237 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5238 if (dw) {
5239 dw->w.cb = w_ov_finished;
5240 dw->device = device;
5241 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005242 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005243 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005244 ov_out_of_sync_print(device);
5245 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005246 }
5247 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005248 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005249 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005250}
5251
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005252static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005253{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005254 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005255}
5256
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005257static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005258{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005259 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005260 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005261
5262 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005263 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005264 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005265
5266 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005267 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5268 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005269 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005270 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005271 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005272 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005273 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005274 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005275 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005276 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005277 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005278 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005279
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005280 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005281 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5282 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005283 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005284 if (not_empty)
5285 break;
5286 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005287 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005288 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005289 } while (not_empty);
5290
5291 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292}
5293
5294struct asender_cmd {
5295 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005296 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005297};
5298
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005299static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005300 [P_PING] = { 0, got_Ping },
5301 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005302 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5303 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5304 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005305 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005306 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5307 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005308 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005309 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5310 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5311 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5312 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005313 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005314 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5315 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5316 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005317};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005318
5319int drbd_asender(struct drbd_thread *thi)
5320{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005321 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005322 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005323 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005324 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005325 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005326 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005327 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005328 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005329 bool ping_timeout_active = false;
5330 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005331 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005332 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005333
Philipp Reisner3990e042013-03-27 14:08:48 +01005334 rv = sched_setscheduler(current, SCHED_RR, &param);
5335 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005336 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005337
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005338 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005339 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005340
5341 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005342 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005343 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005344 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005345 ping_int = nc->ping_int;
5346 rcu_read_unlock();
5347
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005348 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5349 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005350 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005351 goto reconnect;
5352 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005353 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005354 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005355 }
5356
Philipp Reisner32862ec2011-02-08 16:41:01 +01005357 /* TODO: conditionally cork; it may hurt latency if we cork without
5358 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005359 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005360 drbd_tcp_cork(connection->meta.socket);
5361 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005362 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005363 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005364 }
5365 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005366 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005367 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005368
5369 /* short circuit, recv_msg would return EINTR anyways. */
5370 if (signal_pending(current))
5371 continue;
5372
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005373 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5374 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005375
5376 flush_signals(current);
5377
5378 /* Note:
5379 * -EINTR (on meta) we got a signal
5380 * -EAGAIN (on meta) rcvtimeo expired
5381 * -ECONNRESET other side closed the connection
5382 * -ERESTARTSYS (on data) we got a signal
5383 * rv < 0 other than above: unexpected error!
5384 * rv == expected: full header or command
5385 * rv < expected: "woken" by signal during receive
5386 * rv == 0 : "connection shut down by peer"
5387 */
5388 if (likely(rv > 0)) {
5389 received += rv;
5390 buf += rv;
5391 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005392 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005393 long t;
5394 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005395 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005396 rcu_read_unlock();
5397
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005398 t = wait_event_timeout(connection->ping_wait,
5399 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005400 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005401 if (t)
5402 break;
5403 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005404 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005405 goto reconnect;
5406 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005407 /* If the data socket received something meanwhile,
5408 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005409 if (time_after(connection->last_received,
5410 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005411 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005412 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005413 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005414 goto reconnect;
5415 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005416 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005417 continue;
5418 } else if (rv == -EINTR) {
5419 continue;
5420 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005421 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005422 goto reconnect;
5423 }
5424
5425 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005426 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005427 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005428 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005429 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005430 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005431 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005432 goto disconnect;
5433 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005434 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005435 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005436 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005437 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005438 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005439 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005440 }
5441 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005442 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005443
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005444 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005445 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005446 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005447 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005448 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005449
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005450 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005451
Philipp Reisner44ed1672011-04-19 17:10:19 +02005452 if (cmd == &asender_tbl[P_PING_ACK]) {
5453 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005454 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005455 ping_timeout_active = false;
5456 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005457
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005458 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005459 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005460 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005461 cmd = NULL;
5462 }
5463 }
5464
5465 if (0) {
5466reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005467 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5468 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005469 }
5470 if (0) {
5471disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005472 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005473 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005474 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005475
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005476 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005477
5478 return 0;
5479}