blob: 1b0a2be24f39edc8e597ed4348545e08cb025c9c [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
Fabian Frederick7e5fec32016-06-14 00:26:35 +020028#include <linux/uaccess.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Ingo Molnarae7e81c2017-02-01 18:07:51 +010039#include <uapi/linux/sched/types.h>
Ingo Molnar174cd4b2017-02-02 19:15:33 +010040#include <linux/sched/signal.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070041#include <linux/pkt_sched.h>
42#define __KERNEL_SYSCALLS__
43#include <linux/unistd.h>
44#include <linux/vmalloc.h>
45#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070046#include <linux/string.h>
47#include <linux/scatterlist.h>
48#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020049#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070050#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070051#include "drbd_vli.h"
52
Lars Ellenberg9104d312016-06-14 00:26:31 +020053#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME)
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020054
Philipp Reisner77351055b2011-02-07 17:24:26 +010055struct packet_info {
56 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010057 unsigned int size;
58 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020059 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010060};
61
Philipp Reisnerb411b362009-09-25 16:07:19 -070062enum finish_epoch {
63 FE_STILL_LIVE,
64 FE_DESTROYED,
65 FE_RECYCLED,
66};
67
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020068static int drbd_do_features(struct drbd_connection *connection);
69static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020070static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020071static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020072static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010073static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisnerb411b362009-09-25 16:07:19 -070075
76#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
77
Lars Ellenberg45bb9122010-05-14 17:10:48 +020078/*
79 * some helper functions to deal with single linked page lists,
80 * page->private being our "next" pointer.
81 */
82
83/* If at least n pages are linked at head, get n pages off.
84 * Otherwise, don't modify head, and return NULL.
85 * Locking is the responsibility of the caller.
86 */
87static struct page *page_chain_del(struct page **head, int n)
88{
89 struct page *page;
90 struct page *tmp;
91
92 BUG_ON(!n);
93 BUG_ON(!head);
94
95 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020096
97 if (!page)
98 return NULL;
99
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200100 while (page) {
101 tmp = page_chain_next(page);
102 if (--n == 0)
103 break; /* found sufficient pages */
104 if (tmp == NULL)
105 /* insufficient pages, don't use any of them. */
106 return NULL;
107 page = tmp;
108 }
109
110 /* add end of list marker for the returned list */
111 set_page_private(page, 0);
112 /* actual return value, and adjustment of head */
113 page = *head;
114 *head = tmp;
115 return page;
116}
117
118/* may be used outside of locks to find the tail of a (usually short)
119 * "private" page chain, before adding it back to a global chain head
120 * with page_chain_add() under a spinlock. */
121static struct page *page_chain_tail(struct page *page, int *len)
122{
123 struct page *tmp;
124 int i = 1;
125 while ((tmp = page_chain_next(page)))
126 ++i, page = tmp;
127 if (len)
128 *len = i;
129 return page;
130}
131
132static int page_chain_free(struct page *page)
133{
134 struct page *tmp;
135 int i = 0;
136 page_chain_for_each_safe(page, tmp) {
137 put_page(page);
138 ++i;
139 }
140 return i;
141}
142
143static void page_chain_add(struct page **head,
144 struct page *chain_first, struct page *chain_last)
145{
146#if 1
147 struct page *tmp;
148 tmp = page_chain_tail(chain_first, NULL);
149 BUG_ON(tmp != chain_last);
150#endif
151
152 /* add chain to head */
153 set_page_private(chain_last, (unsigned long)*head);
154 *head = chain_first;
155}
156
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200157static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200158 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159{
160 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200161 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200162 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163
164 /* Yes, testing drbd_pp_vacant outside the lock is racy.
165 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168 page = page_chain_del(&drbd_pp_pool, number);
169 if (page)
170 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172 if (page)
173 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200175
Philipp Reisnerb411b362009-09-25 16:07:19 -0700176 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
177 * "criss-cross" setup, that might cause write-out on some other DRBD,
178 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200179 for (i = 0; i < number; i++) {
180 tmp = alloc_page(GFP_TRY);
181 if (!tmp)
182 break;
183 set_page_private(tmp, (unsigned long)page);
184 page = tmp;
185 }
186
187 if (i == number)
188 return page;
189
190 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200191 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200192 * function "soon". */
193 if (page) {
194 tmp = page_chain_tail(page, NULL);
195 spin_lock(&drbd_pp_lock);
196 page_chain_add(&drbd_pp_pool, page, tmp);
197 drbd_pp_vacant += i;
198 spin_unlock(&drbd_pp_lock);
199 }
200 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700201}
202
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200203static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200204 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200206 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700207
208 /* The EEs are always appended to the end of the list. Since
209 they are sent in order over the wire, they have to finish
210 in order. As soon as we see the first not finished we can
211 stop to examine the list... */
212
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200213 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200214 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200216 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700217 }
218}
219
Philipp Reisner668700b2015-03-16 16:08:29 +0100220static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221{
222 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200226 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200227 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200228 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200229 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230}
231
Philipp Reisner668700b2015-03-16 16:08:29 +0100232static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
233{
234 struct drbd_peer_device *peer_device;
235 int vnr;
236
237 rcu_read_lock();
238 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
239 struct drbd_device *device = peer_device->device;
240 if (!atomic_read(&device->pp_in_use_by_net))
241 continue;
242
243 kref_get(&device->kref);
244 rcu_read_unlock();
245 drbd_reclaim_net_peer_reqs(device);
246 kref_put(&device->kref, drbd_destroy_device);
247 rcu_read_lock();
248 }
249 rcu_read_unlock();
250}
251
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200253 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200254 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 * @number: number of pages requested
256 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200258 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200259 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200260 * Possibly retry until DRBD frees sufficient pages somewhere else.
261 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200262 * If this allocation would exceed the max_buffers setting, we throttle
263 * allocation (schedule_timeout) to give the system some room to breathe.
264 *
265 * We do not use max-buffers as hard limit, because it could lead to
266 * congestion and further to a distributed deadlock during online-verify or
267 * (checksum based) resync, if the max-buffers, socket buffer sizes and
268 * resync-rate settings are mis-configured.
269 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200270 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200272struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200273 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200275 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200277 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200279 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280
Philipp Reisner44ed1672011-04-19 17:10:19 +0200281 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200282 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200283 mxb = nc ? nc->max_buffers : 1000000;
284 rcu_read_unlock();
285
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200286 if (atomic_read(&device->pp_in_use) < mxb)
287 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288
Philipp Reisner668700b2015-03-16 16:08:29 +0100289 /* Try to keep the fast path fast, but occasionally we need
290 * to reclaim the pages we lended to the network stack. */
291 if (page && atomic_read(&device->pp_in_use_by_net) > 512)
292 drbd_reclaim_net_peer_reqs(device);
293
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200294 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700295 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
296
Philipp Reisner668700b2015-03-16 16:08:29 +0100297 drbd_reclaim_net_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700298
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200299 if (atomic_read(&device->pp_in_use) < mxb) {
300 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 if (page)
302 break;
303 }
304
305 if (!retry)
306 break;
307
308 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200309 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700310 break;
311 }
312
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200313 if (schedule_timeout(HZ/10) == 0)
314 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315 }
316 finish_wait(&drbd_pp_wait, &wait);
317
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200318 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200319 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320 return page;
321}
322
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200323/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200324 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 * Either links the page chain back to the global pool,
326 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200327static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200329 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700330 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200331
Lars Ellenberga73ff322012-06-25 19:15:38 +0200332 if (page == NULL)
333 return;
334
Philipp Reisner81a5d602011-02-22 19:53:16 -0500335 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200336 i = page_chain_free(page);
337 else {
338 struct page *tmp;
339 tmp = page_chain_tail(page, &i);
340 spin_lock(&drbd_pp_lock);
341 page_chain_add(&drbd_pp_pool, page, tmp);
342 drbd_pp_vacant += i;
343 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700344 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200345 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200346 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200347 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200348 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700349 wake_up(&drbd_pp_wait);
350}
351
352/*
353You need to hold the req_lock:
354 _drbd_wait_ee_list_empty()
355
356You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200357 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200358 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200359 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700360 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200361 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 drbd_clear_done_ee()
363 drbd_wait_ee_list_empty()
364*/
365
Lars Ellenberg9104d312016-06-14 00:26:31 +0200366/* normal: payload_size == request size (bi_size)
367 * w_same: payload_size == logical_block_size
368 * trim: payload_size == 0 */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100369struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200370drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberg9104d312016-06-14 00:26:31 +0200371 unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200373 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200375 struct page *page = NULL;
Lars Ellenberg9104d312016-06-14 00:26:31 +0200376 unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200378 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379 return NULL;
380
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
382 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200384 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 return NULL;
386 }
387
Lars Ellenberg9104d312016-06-14 00:26:31 +0200388 if (nr_pages) {
Mel Gormand0164ad2015-11-06 16:28:21 -0800389 page = drbd_alloc_pages(peer_device, nr_pages,
390 gfpflags_allow_blocking(gfp_mask));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200391 if (!page)
392 goto fail;
393 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394
Lars Ellenbergc5a2c152014-05-08 10:08:05 +0200395 memset(peer_req, 0, sizeof(*peer_req));
396 INIT_LIST_HEAD(&peer_req->w.list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 drbd_clear_interval(&peer_req->i);
Lars Ellenberg9104d312016-06-14 00:26:31 +0200398 peer_req->i.size = request_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100399 peer_req->i.sector = sector;
Lars Ellenbergc5a2c152014-05-08 10:08:05 +0200400 peer_req->submit_jif = jiffies;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200401 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 peer_req->pages = page;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100403 /*
404 * The block_id is opaque to the receiver. It is not endianness
405 * converted, and sent back to the sender unchanged.
406 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100407 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100409 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200411 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100412 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 return NULL;
414}
415
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200416void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100417 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418{
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200419 might_sleep();
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100420 if (peer_req->flags & EE_HAS_DIGEST)
421 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200422 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200423 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
424 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200425 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
426 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
427 drbd_al_complete_io(device, &peer_req->i);
428 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100429 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430}
431
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200432int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700433{
434 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100435 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700436 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200437 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200439 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200441 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200443 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200444 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445 count++;
446 }
447 return count;
448}
449
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200451 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200453static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700454{
455 LIST_HEAD(work_list);
456 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100457 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100458 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200460 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200461 reclaim_finished_net_peer_reqs(device, &reclaimed);
462 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700464
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200465 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200466 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467
468 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200469 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700470 * all ignore the last argument.
471 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200472 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100473 int err2;
474
Philipp Reisnerb411b362009-09-25 16:07:19 -0700475 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200476 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100477 if (!err)
478 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200479 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200481 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700482
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100483 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484}
485
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200486static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200487 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488{
489 DEFINE_WAIT(wait);
490
491 /* avoids spin_lock/unlock
492 * and calling prepare_to_wait in the fast path */
493 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200494 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200495 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100496 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200497 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200498 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700499 }
500}
501
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200502static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200503 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700504{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200505 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200506 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200507 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508}
509
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100510static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700511{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700512 struct kvec iov = {
513 .iov_base = buf,
514 .iov_len = size,
515 };
516 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700517 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
518 };
Al Virof730c842014-02-08 21:07:38 -0500519 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520}
521
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200522static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700524 int rv;
525
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200526 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700527
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200528 if (rv < 0) {
529 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200530 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200531 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200532 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200533 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200534 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200535 long t;
536 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200537 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200538 rcu_read_unlock();
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200541
Philipp Reisner599377a2012-08-17 14:50:22 +0200542 if (t)
543 goto out;
544 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200545 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200546 }
547
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200549 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550
Philipp Reisner599377a2012-08-17 14:50:22 +0200551out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700552 return rv;
553}
554
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200555static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100556{
557 int err;
558
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200559 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100560 if (err != size) {
561 if (err >= 0)
562 err = -EIO;
563 } else
564 err = 0;
565 return err;
566}
567
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200568static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100569{
570 int err;
571
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200572 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100573 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200574 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100575 return err;
576}
577
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200578/* quoting tcp(7):
579 * On individual connections, the socket buffer size must be set prior to the
580 * listen(2) or connect(2) calls in order to have it take effect.
581 * This is our wrapper to do so.
582 */
583static void drbd_setbufsize(struct socket *sock, unsigned int snd,
584 unsigned int rcv)
585{
586 /* open coded SO_SNDBUF, SO_RCVBUF */
587 if (snd) {
588 sock->sk->sk_sndbuf = snd;
589 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
590 }
591 if (rcv) {
592 sock->sk->sk_rcvbuf = rcv;
593 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
594 }
595}
596
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200597static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700598{
599 const char *what;
600 struct socket *sock;
601 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200602 struct sockaddr_in6 peer_in6;
603 struct net_conf *nc;
604 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200605 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700606 int disconnect_on_error = 1;
607
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200609 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610 if (!nc) {
611 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200613 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200614 sndbuf_size = nc->sndbuf_size;
615 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200616 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200617 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200619 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
620 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200621
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200622 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200623 src_in6.sin6_port = 0;
624 else
625 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
626
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200627 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
628 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629
630 what = "sock_create_kern";
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -0500631 err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
Philipp Reisner44ed1672011-04-19 17:10:19 +0200632 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700633 if (err < 0) {
634 sock = NULL;
635 goto out;
636 }
637
638 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200639 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200640 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641
642 /* explicitly bind to the configured IP as source IP
643 * for the outgoing connections.
644 * This is needed for multihomed hosts and to be
645 * able to use lo: interfaces for drbd.
646 * Make sure to use 0 as port number, so linux selects
647 * a free one dynamically.
648 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200650 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 if (err < 0)
652 goto out;
653
654 /* connect may fail, peer not yet available.
655 * stay C_WF_CONNECTION, don't go Disconnecting! */
656 disconnect_on_error = 0;
657 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200658 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700659
660out:
661 if (err < 0) {
662 if (sock) {
663 sock_release(sock);
664 sock = NULL;
665 }
666 switch (-err) {
667 /* timeout, busy, signal pending */
668 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
669 case EINTR: case ERESTARTSYS:
670 /* peer not (yet) available, network problem */
671 case ECONNREFUSED: case ENETUNREACH:
672 case EHOSTDOWN: case EHOSTUNREACH:
673 disconnect_on_error = 0;
674 break;
675 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200676 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 }
678 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200679 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200681
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682 return sock;
683}
684
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200685struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200686 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200687 struct socket *s_listen;
688 struct completion door_bell;
689 void (*original_sk_state_change)(struct sock *sk);
690
691};
692
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200693static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200695 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200696 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200697
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200698 state_change = ad->original_sk_state_change;
699 if (sk->sk_state == TCP_ESTABLISHED)
700 complete(&ad->door_bell);
701 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200702}
703
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200704static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200706 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200708 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200709 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 const char *what;
711
Philipp Reisner44ed1672011-04-19 17:10:19 +0200712 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200713 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200714 if (!nc) {
715 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200716 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200717 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200718 sndbuf_size = nc->sndbuf_size;
719 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200720 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200722 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
723 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724
725 what = "sock_create_kern";
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -0500726 err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200727 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 if (err) {
729 s_listen = NULL;
730 goto out;
731 }
732
Philipp Reisner98683652012-11-09 14:18:43 +0100733 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200734 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735
736 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200737 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738 if (err < 0)
739 goto out;
740
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200741 ad->s_listen = s_listen;
742 write_lock_bh(&s_listen->sk->sk_callback_lock);
743 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200744 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200745 s_listen->sk->sk_user_data = ad;
746 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700747
Philipp Reisner2820fd32012-07-12 10:22:48 +0200748 what = "listen";
749 err = s_listen->ops->listen(s_listen, 5);
750 if (err < 0)
751 goto out;
752
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200753 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754out:
755 if (s_listen)
756 sock_release(s_listen);
757 if (err < 0) {
758 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200759 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200760 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700761 }
762 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200763
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200764 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765}
766
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200767static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
768{
769 write_lock_bh(&sk->sk_callback_lock);
770 sk->sk_state_change = ad->original_sk_state_change;
771 sk->sk_user_data = NULL;
772 write_unlock_bh(&sk->sk_callback_lock);
773}
774
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200775static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200776{
777 int timeo, connect_int, err = 0;
778 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200779 struct net_conf *nc;
780
781 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200783 if (!nc) {
784 rcu_read_unlock();
785 return NULL;
786 }
787 connect_int = nc->connect_int;
788 rcu_read_unlock();
789
790 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700791 /* 28.5% random jitter */
792 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200793
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200794 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
795 if (err <= 0)
796 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200797
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200798 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 if (err < 0) {
800 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200801 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200802 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 }
804 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200806 if (s_estab)
807 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808
809 return s_estab;
810}
811
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200812static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200814static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200815 enum drbd_packet cmd)
816{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200817 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200818 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200819 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820}
821
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200822static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200824 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200825 struct packet_info pi;
Philipp Reisner4920e372014-03-18 14:40:13 +0100826 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200827 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828
Philipp Reisner4920e372014-03-18 14:40:13 +0100829 rcu_read_lock();
830 nc = rcu_dereference(connection->net_conf);
831 if (!nc) {
832 rcu_read_unlock();
833 return -EIO;
834 }
835 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
836 rcu_read_unlock();
837
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200838 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200839 if (err != header_size) {
840 if (err >= 0)
841 err = -EIO;
842 return err;
843 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200844 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200845 if (err)
846 return err;
847 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848}
849
850/**
851 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 * @sock: pointer to the pointer to the socket.
853 */
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100854static bool drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855{
856 int rr;
857 char tb[4];
858
859 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100860 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100862 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700863
864 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100865 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 } else {
867 sock_release(*sock);
868 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100869 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 }
871}
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100872
873static bool connection_established(struct drbd_connection *connection,
874 struct socket **sock1,
875 struct socket **sock2)
876{
877 struct net_conf *nc;
878 int timeout;
879 bool ok;
880
881 if (!*sock1 || !*sock2)
882 return false;
883
884 rcu_read_lock();
885 nc = rcu_dereference(connection->net_conf);
886 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
887 rcu_read_unlock();
888 schedule_timeout_interruptible(timeout);
889
890 ok = drbd_socket_okay(sock1);
891 ok = drbd_socket_okay(sock2) && ok;
892
893 return ok;
894}
895
Philipp Reisner2325eb62011-03-15 16:56:18 +0100896/* Gets called if a connection is established, or if a new minor gets created
897 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200898int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100899{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200900 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100901 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200903 atomic_set(&device->packet_seq, 0);
904 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100905
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200906 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
907 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200908 &device->own_state_mutex;
Philipp Reisner8410da8f02011-02-11 20:11:10 +0100909
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200910 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100911 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200912 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100913 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200914 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100915 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200916 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200917 clear_bit(USE_DEGR_WFC_T, &device->flags);
918 clear_bit(RESIZE_PENDING, &device->flags);
919 atomic_set(&device->ap_in_flight, 0);
920 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100921 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100922}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923
924/*
925 * return values:
926 * 1 yes, we have a valid connection
927 * 0 oops, did not work out, please try again
928 * -1 peer talks different language,
929 * no point in trying again, please go standalone.
930 * -2 We do not have a network config...
931 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200932static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933{
Philipp Reisner7da35862011-12-19 22:42:56 +0100934 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200935 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200936 struct net_conf *nc;
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100937 int vnr, timeout, h;
938 bool discard_my_data, ok;
Philipp Reisner197296f2012-03-26 16:47:11 +0200939 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200940 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200941 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200942 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
943 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200945 clear_bit(DISCONNECT_SENT, &connection->flags);
946 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700947 return -2;
948
Philipp Reisner7da35862011-12-19 22:42:56 +0100949 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200950 sock.sbuf = connection->data.sbuf;
951 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 sock.socket = NULL;
953 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200954 msock.sbuf = connection->meta.sbuf;
955 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100956 msock.socket = NULL;
957
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100958 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200959 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200961 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200962 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963
964 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200965 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200967 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100969 if (!sock.socket) {
970 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200971 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100972 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200973 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100974 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200975 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200977 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978 goto out_release_sockets;
979 }
980 }
981
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100982 if (connection_established(connection, &sock.socket, &msock.socket))
983 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
985retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200986 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200988 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100989 drbd_socket_okay(&sock.socket);
990 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200991 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200992 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100993 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200994 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100995 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200996 sock.socket = s;
997 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100999 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +02001001 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001002 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +01001003 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001004 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +01001005 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +02001006 msock.socket = s;
1007 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 }
Philipp Reisner7da35862011-12-19 22:42:56 +01001009 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010 break;
1011 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001012 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +02001014randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -07001015 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 goto retry;
1017 }
1018 }
1019
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001020 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 goto out_release_sockets;
1022 if (signal_pending(current)) {
1023 flush_signals(current);
1024 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001025 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026 goto out_release_sockets;
1027 }
1028
Philipp Reisner5d0b17f2014-03-18 14:24:35 +01001029 ok = connection_established(connection, &sock.socket, &msock.socket);
Philipp Reisnerb666dbf2012-07-26 14:12:59 +02001030 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001031
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001032 if (ad.s_listen)
1033 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Philipp Reisner98683652012-11-09 14:18:43 +01001035 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1036 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037
Philipp Reisner7da35862011-12-19 22:42:56 +01001038 sock.socket->sk->sk_allocation = GFP_NOIO;
1039 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040
Philipp Reisner7da35862011-12-19 22:42:56 +01001041 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1042 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001045 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +01001046 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001047 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001049 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001050 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051
Philipp Reisner7da35862011-12-19 22:42:56 +01001052 sock.socket->sk->sk_sndtimeo =
1053 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001054
Philipp Reisner7da35862011-12-19 22:42:56 +01001055 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001056 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001057 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001058 rcu_read_unlock();
1059
Philipp Reisner7da35862011-12-19 22:42:56 +01001060 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061
1062 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001063 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001064 drbd_tcp_nodelay(sock.socket);
1065 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001067 connection->data.socket = sock.socket;
1068 connection->meta.socket = msock.socket;
1069 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001070
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001071 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 if (h <= 0)
1073 return h;
1074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001076 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001077 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001078 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001079 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001081 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001082 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001083 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084 }
1085 }
1086
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001087 connection->data.socket->sk->sk_sndtimeo = timeout;
1088 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001090 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001091 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001092
Philipp Reisner31007742014-04-28 18:43:12 +02001093 /* Prevent a race between resync-handshake and
1094 * being promoted to Primary.
1095 *
1096 * Grab and release the state mutex, so we know that any current
1097 * drbd_set_role() is finished, and any incoming drbd_set_role
1098 * will see the STATE_SENT flag, and wait for it to be cleared.
1099 */
1100 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1101 mutex_lock(peer_device->device->state_mutex);
1102
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001103 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001104
Philipp Reisner31007742014-04-28 18:43:12 +02001105 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1106 mutex_unlock(peer_device->device->state_mutex);
1107
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001108 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001109 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1110 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001111 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001112 rcu_read_unlock();
1113
Philipp Reisner08b165b2011-09-05 16:22:33 +02001114 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001115 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001116 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001117 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001118
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001119 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001120 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001121 rcu_read_lock();
1122 }
1123 rcu_read_unlock();
1124
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001125 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1126 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1127 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001128 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001129 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001130
Philipp Reisner1c03e522015-03-16 15:01:00 +01001131 drbd_thread_start(&connection->ack_receiver);
Lars Ellenberg39e91a62015-03-24 10:40:26 +01001132 /* opencoded create_singlethread_workqueue(),
1133 * to be able to use format string arguments */
1134 connection->ack_sender =
1135 alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
Philipp Reisner668700b2015-03-16 16:08:29 +01001136 if (!connection->ack_sender) {
1137 drbd_err(connection, "Failed to create workqueue ack_sender\n");
1138 return 0;
1139 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001141 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001142 /* The discard_my_data flag is a single-shot modifier to the next
1143 * connection attempt, the handshake of which is now well underway.
1144 * No need for rcu style copying of the whole struct
1145 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001146 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001147 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001148
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001149 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001150
1151out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001152 if (ad.s_listen)
1153 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001154 if (sock.socket)
1155 sock_release(sock.socket);
1156 if (msock.socket)
1157 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001158 return -1;
1159}
1160
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001161static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001162{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001163 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001164
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001165 if (header_size == sizeof(struct p_header100) &&
1166 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1167 struct p_header100 *h = header;
1168 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001169 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001170 return -EINVAL;
1171 }
1172 pi->vnr = be16_to_cpu(h->volume);
1173 pi->cmd = be16_to_cpu(h->command);
1174 pi->size = be32_to_cpu(h->length);
1175 } else if (header_size == sizeof(struct p_header95) &&
1176 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001177 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001178 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001179 pi->size = be32_to_cpu(h->length);
1180 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001181 } else if (header_size == sizeof(struct p_header80) &&
1182 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1183 struct p_header80 *h = header;
1184 pi->cmd = be16_to_cpu(h->command);
1185 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001186 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001187 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001188 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001189 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001190 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001191 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001193 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001194 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195}
1196
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001197static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001198{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001199 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001200 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001201
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001202 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001203 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001204 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001205
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001206 err = decode_header(connection, buffer, pi);
1207 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001209 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210}
1211
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001212/* This is blkdev_issue_flush, but asynchronous.
1213 * We want to submit to all component volumes in parallel,
1214 * then wait for all completions.
1215 */
1216struct issue_flush_context {
1217 atomic_t pending;
1218 int error;
1219 struct completion done;
1220};
1221struct one_flush_context {
1222 struct drbd_device *device;
1223 struct issue_flush_context *ctx;
1224};
1225
1226void one_flush_endio(struct bio *bio)
1227{
1228 struct one_flush_context *octx = bio->bi_private;
1229 struct drbd_device *device = octx->device;
1230 struct issue_flush_context *ctx = octx->ctx;
1231
1232 if (bio->bi_error) {
1233 ctx->error = bio->bi_error;
1234 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
1235 }
1236 kfree(octx);
1237 bio_put(bio);
1238
1239 clear_bit(FLUSH_PENDING, &device->flags);
1240 put_ldev(device);
1241 kref_put(&device->kref, drbd_destroy_device);
1242
1243 if (atomic_dec_and_test(&ctx->pending))
1244 complete(&ctx->done);
1245}
1246
1247static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1248{
1249 struct bio *bio = bio_alloc(GFP_NOIO, 0);
1250 struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1251 if (!bio || !octx) {
1252 drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1253 /* FIXME: what else can I do now? disconnecting or detaching
1254 * really does not help to improve the state of the world, either.
1255 */
1256 kfree(octx);
1257 if (bio)
1258 bio_put(bio);
1259
1260 ctx->error = -ENOMEM;
1261 put_ldev(device);
1262 kref_put(&device->kref, drbd_destroy_device);
1263 return;
1264 }
1265
1266 octx->device = device;
1267 octx->ctx = ctx;
1268 bio->bi_bdev = device->ldev->backing_bdev;
1269 bio->bi_private = octx;
1270 bio->bi_end_io = one_flush_endio;
Christoph Hellwig70fd7612016-11-01 07:40:10 -06001271 bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001272
1273 device->flush_jif = jiffies;
1274 set_bit(FLUSH_PENDING, &device->flags);
1275 atomic_inc(&ctx->pending);
1276 submit_bio(bio);
1277}
1278
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001279static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280{
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001281 if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001282 struct drbd_peer_device *peer_device;
1283 struct issue_flush_context ctx;
1284 int vnr;
1285
1286 atomic_set(&ctx.pending, 1);
1287 ctx.error = 0;
1288 init_completion(&ctx.done);
1289
Lars Ellenberg615e0872011-11-17 14:32:12 +01001290 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001291 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1292 struct drbd_device *device = peer_device->device;
1293
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001294 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001295 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001296 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001297 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001298
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001299 submit_one_flush(device, &ctx);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001300
1301 rcu_read_lock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001303 rcu_read_unlock();
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001304
1305 /* Do we want to add a timeout,
1306 * if disk-timeout is set? */
1307 if (!atomic_dec_and_test(&ctx.pending))
1308 wait_for_completion(&ctx.done);
1309
1310 if (ctx.error) {
1311 /* would rather check on EOPNOTSUPP, but that is not reliable.
1312 * don't try again for ANY return value != 0
1313 * if (rv == -EOPNOTSUPP) */
1314 /* Any error is already reported by bio_endio callback. */
1315 drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1316 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001317 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318}
1319
1320/**
1321 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001322 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323 * @epoch: Epoch object.
1324 * @ev: Epoch event.
1325 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001326static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001327 struct drbd_epoch *epoch,
1328 enum epoch_event ev)
1329{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001330 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001331 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001332 enum finish_epoch rv = FE_STILL_LIVE;
1333
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001334 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001335 do {
1336 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337
1338 epoch_size = atomic_read(&epoch->epoch_size);
1339
1340 switch (ev & ~EV_CLEANUP) {
1341 case EV_PUT:
1342 atomic_dec(&epoch->active);
1343 break;
1344 case EV_GOT_BARRIER_NR:
1345 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346 break;
1347 case EV_BECAME_LAST:
1348 /* nothing to do*/
1349 break;
1350 }
1351
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 if (epoch_size != 0 &&
1353 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001354 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001356 spin_unlock(&connection->epoch_lock);
1357 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1358 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001359 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001360#if 0
1361 /* FIXME: dec unacked on connection, once we have
1362 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001363 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001364 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001365#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001366
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001367 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001368 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1369 list_del(&epoch->list);
1370 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001371 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001372 kfree(epoch);
1373
1374 if (rv == FE_STILL_LIVE)
1375 rv = FE_DESTROYED;
1376 } else {
1377 epoch->flags = 0;
1378 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001379 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380 if (rv == FE_STILL_LIVE)
1381 rv = FE_RECYCLED;
1382 }
1383 }
1384
1385 if (!next_epoch)
1386 break;
1387
1388 epoch = next_epoch;
1389 } while (1);
1390
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001391 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001392
Philipp Reisnerb411b362009-09-25 16:07:19 -07001393 return rv;
1394}
1395
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001396static enum write_ordering_e
1397max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1398{
1399 struct disk_conf *dc;
1400
1401 dc = rcu_dereference(bdev->disk_conf);
1402
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001403 if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1404 wo = WO_DRAIN_IO;
1405 if (wo == WO_DRAIN_IO && !dc->disk_drain)
1406 wo = WO_NONE;
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001407
1408 return wo;
1409}
1410
Philipp Reisnerb411b362009-09-25 16:07:19 -07001411/**
1412 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001413 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001414 * @wo: Write ordering method to try.
1415 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001416void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1417 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001418{
Philipp Reisnere9526582013-11-22 15:53:41 +01001419 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001420 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001421 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001422 static char *write_ordering_str[] = {
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001423 [WO_NONE] = "none",
1424 [WO_DRAIN_IO] = "drain",
1425 [WO_BDEV_FLUSH] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426 };
1427
Philipp Reisnere9526582013-11-22 15:53:41 +01001428 pwo = resource->write_ordering;
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001429 if (wo != WO_BDEV_FLUSH)
Lars Ellenberg70df7092013-12-20 11:17:02 +01001430 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001431 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001432 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001433 if (get_ldev(device)) {
1434 wo = max_allowed_wo(device->ldev, wo);
1435 if (device->ldev == bdev)
1436 bdev = NULL;
1437 put_ldev(device);
1438 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001439 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001440
1441 if (bdev)
1442 wo = max_allowed_wo(bdev, wo);
1443
Lars Ellenberg70df7092013-12-20 11:17:02 +01001444 rcu_read_unlock();
1445
Philipp Reisnere9526582013-11-22 15:53:41 +01001446 resource->write_ordering = wo;
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001447 if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
Philipp Reisnere9526582013-11-22 15:53:41 +01001448 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449}
1450
Lars Ellenberg9104d312016-06-14 00:26:31 +02001451static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001452{
Christoph Hellwig0dbed962017-04-05 19:21:21 +02001453 struct block_device *bdev = device->ldev->backing_bdev;
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001454
Christoph Hellwig0dbed962017-04-05 19:21:21 +02001455 if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
1456 GFP_NOIO, 0))
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001457 peer_req->flags |= EE_WAS_ERROR;
Christoph Hellwig0dbed962017-04-05 19:21:21 +02001458
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001459 drbd_endio_write_sec_final(peer_req);
1460}
1461
Lars Ellenberg9104d312016-06-14 00:26:31 +02001462static void drbd_issue_peer_wsame(struct drbd_device *device,
1463 struct drbd_peer_request *peer_req)
1464{
1465 struct block_device *bdev = device->ldev->backing_bdev;
1466 sector_t s = peer_req->i.sector;
1467 sector_t nr = peer_req->i.size >> 9;
1468 if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
1469 peer_req->flags |= EE_WAS_ERROR;
1470 drbd_endio_write_sec_final(peer_req);
1471}
1472
1473
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001475 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001476 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 * @peer_req: peer request
Jens Axboe1eff9d32016-08-05 15:35:16 -06001478 * @rw: flag field, see bio->bi_opf
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001479 *
1480 * May spread the pages to multiple bios,
1481 * depending on bio_add_page restrictions.
1482 *
1483 * Returns 0 if all bios have been submitted,
1484 * -ENOMEM if we could not allocate enough bios,
1485 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1486 * single page to an empty bio (which should never happen and likely indicates
1487 * that the lower level IO stack is in some way broken). This has been observed
1488 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001489 */
1490/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001491int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001492 struct drbd_peer_request *peer_req,
Mike Christiebb3cc852016-06-05 14:32:06 -05001493 const unsigned op, const unsigned op_flags,
1494 const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001495{
1496 struct bio *bios = NULL;
1497 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001498 struct page *page = peer_req->pages;
1499 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001500 unsigned data_size = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001501 unsigned n_bios = 0;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001502 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001503 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001504
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001505 /* TRIM/DISCARD: for now, always use the helper function
1506 * blkdev_issue_zeroout(..., discard=true).
1507 * It's synchronous, but it does the right thing wrt. bio splitting.
1508 * Correctness first, performance later. Next step is to code an
1509 * asynchronous variant of the same.
1510 */
Lars Ellenberg9104d312016-06-14 00:26:31 +02001511 if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001512 /* wait for all pending IO completions, before we start
1513 * zeroing things out. */
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02001514 conn_wait_active_ee_empty(peer_req->peer_device->connection);
Lars Ellenberg45d29332014-04-23 12:25:23 +02001515 /* add it to the active list now,
1516 * so we can find it to present it in debugfs */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001517 peer_req->submit_jif = jiffies;
1518 peer_req->flags |= EE_SUBMITTED;
Philipp Reisner700ca8c2016-06-14 00:26:13 +02001519
1520 /* If this was a resync request from receive_rs_deallocated(),
1521 * it is already on the sync_ee list */
1522 if (list_empty(&peer_req->w.list)) {
1523 spin_lock_irq(&device->resource->req_lock);
1524 list_add_tail(&peer_req->w.list, &device->active_ee);
1525 spin_unlock_irq(&device->resource->req_lock);
1526 }
1527
Lars Ellenberg9104d312016-06-14 00:26:31 +02001528 if (peer_req->flags & EE_IS_TRIM)
1529 drbd_issue_peer_discard(device, peer_req);
1530 else /* EE_WRITE_SAME */
1531 drbd_issue_peer_wsame(device, peer_req);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001532 return 0;
1533 }
1534
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001535 /* In most cases, we will only need one bio. But in case the lower
1536 * level restrictions happen to be different at this offset on this
1537 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001538 * request in more than one bio.
1539 *
1540 * Plain bio_alloc is good enough here, this is no DRBD internally
1541 * generated bio, but a bio allocated on behalf of the peer.
1542 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001543next_bio:
1544 bio = bio_alloc(GFP_NOIO, nr_pages);
1545 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001546 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001547 goto fail;
1548 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001549 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001550 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001551 bio->bi_bdev = device->ldev->backing_bdev;
Mike Christiebb3cc852016-06-05 14:32:06 -05001552 bio_set_op_attrs(bio, op, op_flags);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001553 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001554 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001555
1556 bio->bi_next = bios;
1557 bios = bio;
1558 ++n_bios;
1559
1560 page_chain_for_each(page) {
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001561 unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
Ming Lei06efffd2016-11-11 20:05:30 +08001562 if (!bio_add_page(bio, page, len, 0))
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001563 goto next_bio;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001564 data_size -= len;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001565 sector += len >> 9;
1566 --nr_pages;
1567 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001568 D_ASSERT(device, data_size == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001569 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001570
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001571 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001572 /* for debugfs: update timestamp, mark as submitted */
1573 peer_req->submit_jif = jiffies;
1574 peer_req->flags |= EE_SUBMITTED;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001575 do {
1576 bio = bios;
1577 bios = bios->bi_next;
1578 bio->bi_next = NULL;
1579
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001580 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001581 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001582 return 0;
1583
1584fail:
1585 while (bios) {
1586 bio = bios;
1587 bios = bios->bi_next;
1588 bio_put(bio);
1589 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001590 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001591}
1592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001593static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001594 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001595{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001596 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001597
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001598 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001599 drbd_clear_interval(i);
1600
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001601 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001602 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001603 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001604}
1605
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001606static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001607{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001608 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001609 int vnr;
1610
1611 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001612 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1613 struct drbd_device *device = peer_device->device;
1614
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001615 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001616 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001617 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001618 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001619 rcu_read_lock();
1620 }
1621 rcu_read_unlock();
1622}
1623
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001624static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001626 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001627 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628 struct drbd_epoch *epoch;
1629
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001630 /* FIXME these are unacked on connection,
1631 * not a specific (peer)device.
1632 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001633 connection->current_epoch->barrier_nr = p->barrier;
1634 connection->current_epoch->connection = connection;
1635 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636
1637 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1638 * the activity log, which means it would not be resynced in case the
1639 * R_PRIMARY crashes now.
1640 * Therefore we must send the barrier_ack after the barrier request was
1641 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001642 switch (connection->resource->write_ordering) {
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001643 case WO_NONE:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001645 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001646
1647 /* receiver context, in the writeout path of the other node.
1648 * avoid potential distributed deadlock */
1649 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1650 if (epoch)
1651 break;
1652 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001653 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001654 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001656 case WO_BDEV_FLUSH:
1657 case WO_DRAIN_IO:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001658 conn_wait_active_ee_empty(connection);
1659 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001660
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001661 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001662 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1663 if (epoch)
1664 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665 }
1666
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001667 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001668 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001669 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1670 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001671 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672 }
1673
1674 epoch->flags = 0;
1675 atomic_set(&epoch->epoch_size, 0);
1676 atomic_set(&epoch->active, 0);
1677
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001678 spin_lock(&connection->epoch_lock);
1679 if (atomic_read(&connection->current_epoch->epoch_size)) {
1680 list_add(&epoch->list, &connection->current_epoch->list);
1681 connection->current_epoch = epoch;
1682 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683 } else {
1684 /* The current_epoch got recycled while we allocated this one... */
1685 kfree(epoch);
1686 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001687 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001688
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001689 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690}
1691
Lars Ellenberg9104d312016-06-14 00:26:31 +02001692/* quick wrapper in case payload size != request_size (write same) */
1693static void drbd_csum_ee_size(struct crypto_ahash *h,
1694 struct drbd_peer_request *r, void *d,
1695 unsigned int payload_size)
1696{
1697 unsigned int tmp = r->i.size;
1698 r->i.size = payload_size;
1699 drbd_csum_ee(h, r, d);
1700 r->i.size = tmp;
1701}
1702
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703/* used from receive_RSDataReply (recv_resync_read)
Lars Ellenberg9104d312016-06-14 00:26:31 +02001704 * and from receive_Data.
1705 * data_size: actual payload ("data in")
1706 * for normal writes that is bi_size.
1707 * for discards, that is zero.
1708 * for write same, it is logical_block_size.
1709 * both trim and write same have the bi_size ("data len to be affected")
1710 * as extra argument in the packet header.
1711 */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001712static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001713read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001714 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001716 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001717 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001718 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719 struct page *page;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001720 int digest_size, err;
1721 unsigned int data_size = pi->size, ds;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001722 void *dig_in = peer_device->connection->int_dig_in;
1723 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001724 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001725 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001726 struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001727
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001728 digest_size = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001729 if (!trim && peer_device->connection->peer_integrity_tfm) {
Herbert Xu9534d672016-01-24 21:19:21 +08001730 digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001731 /*
1732 * FIXME: Receive the incoming digest into the receive buffer
1733 * here, together with its struct p_data?
1734 */
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001735 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001736 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737 return NULL;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001738 data_size -= digest_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001739 }
1740
Lars Ellenberg9104d312016-06-14 00:26:31 +02001741 /* assume request_size == data_size, but special case trim and wsame. */
1742 ds = data_size;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001743 if (trim) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02001744 if (!expect(data_size == 0))
1745 return NULL;
1746 ds = be32_to_cpu(trim->size);
1747 } else if (wsame) {
1748 if (data_size != queue_logical_block_size(device->rq_queue)) {
1749 drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
1750 data_size, queue_logical_block_size(device->rq_queue));
1751 return NULL;
1752 }
1753 if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
1754 drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
1755 data_size, bdev_logical_block_size(device->ldev->backing_bdev));
1756 return NULL;
1757 }
1758 ds = be32_to_cpu(wsame->size);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001759 }
1760
Lars Ellenberg9104d312016-06-14 00:26:31 +02001761 if (!expect(IS_ALIGNED(ds, 512)))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001762 return NULL;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001763 if (trim || wsame) {
1764 if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
1765 return NULL;
1766 } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001767 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768
Lars Ellenberg66660322010-04-06 12:15:04 +02001769 /* even though we trust out peer,
1770 * we sometimes have to double check. */
Lars Ellenberg9104d312016-06-14 00:26:31 +02001771 if (sector + (ds>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001772 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001773 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001774 (unsigned long long)capacity,
Lars Ellenberg9104d312016-06-14 00:26:31 +02001775 (unsigned long long)sector, ds);
Lars Ellenberg66660322010-04-06 12:15:04 +02001776 return NULL;
1777 }
1778
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1780 * "criss-cross" setup, that might cause write-out on some other DRBD,
1781 * which in turn might block on the other node at this very place. */
Lars Ellenberg9104d312016-06-14 00:26:31 +02001782 peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001783 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001785
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001786 peer_req->flags |= EE_WRITE;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001787 if (trim) {
1788 peer_req->flags |= EE_IS_TRIM;
Lars Ellenberg81a35372012-07-30 09:00:54 +02001789 return peer_req;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001790 }
1791 if (wsame)
1792 peer_req->flags |= EE_WRITE_SAME;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001793
Lars Ellenberg9104d312016-06-14 00:26:31 +02001794 /* receive payload size bytes into page chain */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001796 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001797 page_chain_for_each(page) {
1798 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001799 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001800 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001801 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001802 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001803 data[0] = data[0] ^ (unsigned long)-1;
1804 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001806 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001807 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808 return NULL;
1809 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001810 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 }
1812
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001813 if (digest_size) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02001814 drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001815 if (memcmp(dig_in, dig_vv, digest_size)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001816 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001817 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001818 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819 return NULL;
1820 }
1821 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001822 device->recv_cnt += data_size >> 9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001823 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001824}
1825
1826/* drbd_drain_block() just takes a data block
1827 * out of the socket input buffer, and discards it.
1828 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001829static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830{
1831 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001832 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833 void *data;
1834
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001835 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001836 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001837
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001838 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839
1840 data = kmap(page);
1841 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001842 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1843
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001844 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001845 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001847 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001848 }
1849 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001850 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001851 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852}
1853
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001854static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 sector_t sector, int data_size)
1856{
Kent Overstreet79886132013-11-23 17:19:00 -08001857 struct bio_vec bvec;
1858 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001859 struct bio *bio;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001860 int digest_size, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001861 void *dig_in = peer_device->connection->int_dig_in;
1862 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001864 digest_size = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001865 if (peer_device->connection->peer_integrity_tfm) {
Herbert Xu9534d672016-01-24 21:19:21 +08001866 digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001867 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001868 if (err)
1869 return err;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001870 data_size -= digest_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871 }
1872
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873 /* optimistically update recv_cnt. if receiving fails below,
1874 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001875 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001876
1877 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001878 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879
Kent Overstreet79886132013-11-23 17:19:00 -08001880 bio_for_each_segment(bvec, bio, iter) {
1881 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1882 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001883 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001884 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001885 if (err)
1886 return err;
1887 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888 }
1889
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001890 if (digest_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001891 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001892 if (memcmp(dig_in, dig_vv, digest_size)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001893 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001894 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895 }
1896 }
1897
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001898 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001899 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001900}
1901
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001902/*
Philipp Reisner668700b2015-03-16 16:08:29 +01001903 * e_end_resync_block() is called in ack_sender context via
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001904 * drbd_finish_peer_reqs().
1905 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001906static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001908 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001909 container_of(w, struct drbd_peer_request, w);
1910 struct drbd_peer_device *peer_device = peer_req->peer_device;
1911 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001912 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001913 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001915 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001916
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001917 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001918 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001919 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920 } else {
1921 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001922 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001923
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001924 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001925 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001926 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001927
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001928 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929}
1930
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001931static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001932 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001934 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001935 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001936
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001937 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001938 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001939 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001940
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001941 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001942
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001943 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944 /* corresponding dec_unacked() in e_end_resync_block()
1945 * respective _drbd_clear_done_ee */
1946
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001947 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001948 peer_req->submit_jif = jiffies;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001949
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001950 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02001951 list_add_tail(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001952 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001954 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Mike Christiebb3cc852016-06-05 14:32:06 -05001955 if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
1956 DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001957 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001959 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001960 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001961 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001962 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001963 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001964
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001965 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001966fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001967 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001968 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001969}
1970
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001971static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001972find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001973 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974{
1975 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001976
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001977 /* Request object according to our peer */
1978 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001979 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001980 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001981 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001982 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001983 (unsigned long)id, (unsigned long long)sector);
1984 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001985 return NULL;
1986}
1987
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001988static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001989{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001990 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001991 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992 struct drbd_request *req;
1993 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001994 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001995 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001996
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001997 peer_device = conn_peer_device(connection, pi->vnr);
1998 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001999 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002000 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001
2002 sector = be64_to_cpu(p->sector);
2003
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002004 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002005 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002006 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01002007 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002008 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009
Bart Van Assche24c48302011-05-21 18:32:29 +02002010 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011 * special casing it there for the various failure cases.
2012 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002013 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002014 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01002015 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002016 /* else: nothing. handled from drbd_disconnect...
2017 * I don't think we may complete this just yet
2018 * in case we are "on-disconnect: freeze" */
2019
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002020 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021}
2022
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002023static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002025 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002026 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002028 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002029 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002030
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002031 peer_device = conn_peer_device(connection, pi->vnr);
2032 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002033 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002034 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035
2036 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002037 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002038
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002039 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 /* data is submitted to disk within recv_resync_read.
2041 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002042 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002043 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002044 } else {
2045 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002046 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002047
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002048 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002049
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002050 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051 }
2052
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002053 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02002054
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002055 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002056}
2057
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002058static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002059 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002060{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002061 struct drbd_interval *i;
2062 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002063
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002064 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002065 if (!i->local)
2066 continue;
2067 req = container_of(i, struct drbd_request, i);
2068 if (req->rq_state & RQ_LOCAL_PENDING ||
2069 !(req->rq_state & RQ_POSTPONED))
2070 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01002071 /* as it is RQ_POSTPONED, this will cause it to
2072 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002073 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002074 }
2075}
2076
Andreas Gruenbachera990be42011-04-06 17:56:48 +02002077/*
Philipp Reisner668700b2015-03-16 16:08:29 +01002078 * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07002079 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002080static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002081{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01002082 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002083 container_of(w, struct drbd_peer_request, w);
2084 struct drbd_peer_device *peer_device = peer_req->peer_device;
2085 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002086 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002087 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088
Philipp Reisner303d1442011-04-13 16:24:47 -07002089 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002090 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002091 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2092 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002093 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002095 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002096 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002097 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002098 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002099 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002100 /* we expect it to be marked out of sync anyways...
2101 * maybe assert this? */
2102 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002103 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002104 }
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002105
Philipp Reisnerb411b362009-09-25 16:07:19 -07002106 /* we delete from the conflict detection hash _after_ we sent out the
2107 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02002108 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002109 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002110 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002111 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002112 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002113 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002114 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01002115 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002116 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002117
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02002118 drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002119
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002120 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002121}
2122
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002123static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002124{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01002125 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002126 container_of(w, struct drbd_peer_request, w);
2127 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002128 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002129
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002130 err = drbd_send_ack(peer_device, ack, peer_req);
2131 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002132
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002133 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134}
2135
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002136static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002137{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002138 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002139}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002140
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002141static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002142{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002143 struct drbd_peer_request *peer_req =
2144 container_of(w, struct drbd_peer_request, w);
2145 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002146
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002147 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002148 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002149}
2150
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002151static bool seq_greater(u32 a, u32 b)
2152{
2153 /*
2154 * We assume 32-bit wrap-around here.
2155 * For 24-bit wrap-around, we would have to shift:
2156 * a <<= 8; b <<= 8;
2157 */
2158 return (s32)a - (s32)b > 0;
2159}
2160
2161static u32 seq_max(u32 a, u32 b)
2162{
2163 return seq_greater(a, b) ? a : b;
2164}
2165
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002166static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002167{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002168 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002169 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002170
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002171 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002172 spin_lock(&device->peer_seq_lock);
2173 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2174 device->peer_seq = newest_peer_seq;
2175 spin_unlock(&device->peer_seq_lock);
2176 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002177 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002178 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002179 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002180}
2181
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002182static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2183{
2184 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2185}
2186
2187/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002188static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002189{
2190 struct drbd_peer_request *rs_req;
Fabian Frederick7e5fec32016-06-14 00:26:35 +02002191 bool rv = false;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002192
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002193 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002194 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002195 if (overlaps(peer_req->i.sector, peer_req->i.size,
2196 rs_req->i.sector, rs_req->i.size)) {
Fabian Frederick7e5fec32016-06-14 00:26:35 +02002197 rv = true;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002198 break;
2199 }
2200 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002201 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002202
2203 return rv;
2204}
2205
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206/* Called from receive_Data.
2207 * Synchronize packets on sock with packets on msock.
2208 *
2209 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2210 * packet traveling on msock, they are still processed in the order they have
2211 * been sent.
2212 *
2213 * Note: we don't care for Ack packets overtaking P_DATA packets.
2214 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002215 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002217 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002218 * ourselves. Correctly handles 32bit wrap around.
2219 *
2220 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2221 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2222 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2223 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2224 *
2225 * returns 0 if we may process the packet,
2226 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002227static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002228{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002229 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002232 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002233
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002234 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002235 return 0;
2236
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002237 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002239 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2240 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002241 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002242 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002243
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 if (signal_pending(current)) {
2245 ret = -ERESTARTSYS;
2246 break;
2247 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002248
2249 rcu_read_lock();
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02002250 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002251 rcu_read_unlock();
2252
2253 if (!tp)
2254 break;
2255
2256 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002257 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2258 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002259 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002260 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002261 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002262 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002263 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002264 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002266 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 break;
2268 }
2269 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002270 spin_unlock(&device->peer_seq_lock);
2271 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 return ret;
2273}
2274
Lars Ellenberg688593c2010-11-17 22:25:03 +01002275/* see also bio_flags_to_wire()
2276 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2277 * flags and back. We may replicate to other kernel versions. */
Mike Christiebb3cc852016-06-05 14:32:06 -05002278static unsigned long wire_flags_to_bio_flags(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002279{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002280 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2281 (dpf & DP_FUA ? REQ_FUA : 0) |
Mike Christie28a8f0d2016-06-05 14:32:25 -05002282 (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
Mike Christiebb3cc852016-06-05 14:32:06 -05002283}
2284
2285static unsigned long wire_flags_to_bio_op(u32 dpf)
2286{
2287 if (dpf & DP_DISCARD)
Christoph Hellwig45c21792017-04-05 19:21:22 +02002288 return REQ_OP_WRITE_ZEROES;
Mike Christiebb3cc852016-06-05 14:32:06 -05002289 else
2290 return REQ_OP_WRITE;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002291}
2292
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002293static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002294 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002295{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002296 struct drbd_interval *i;
2297
2298 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002299 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002300 struct drbd_request *req;
2301 struct bio_and_error m;
2302
2303 if (!i->local)
2304 continue;
2305 req = container_of(i, struct drbd_request, i);
2306 if (!(req->rq_state & RQ_POSTPONED))
2307 continue;
2308 req->rq_state &= ~RQ_POSTPONED;
2309 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002310 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002311 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002312 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002313 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002314 goto repeat;
2315 }
2316}
2317
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002318static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002319 struct drbd_peer_request *peer_req)
2320{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002321 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002322 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002323 sector_t sector = peer_req->i.sector;
2324 const unsigned int size = peer_req->i.size;
2325 struct drbd_interval *i;
2326 bool equal;
2327 int err;
2328
2329 /*
2330 * Inserting the peer request into the write_requests tree will prevent
2331 * new conflicting local requests from being added.
2332 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002333 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002334
2335 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002336 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002337 if (i == &peer_req->i)
2338 continue;
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002339 if (i->completed)
2340 continue;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002341
2342 if (!i->local) {
2343 /*
2344 * Our peer has sent a conflicting remote request; this
2345 * should not happen in a two-node setup. Wait for the
2346 * earlier peer request to complete.
2347 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002348 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002349 if (err)
2350 goto out;
2351 goto repeat;
2352 }
2353
2354 equal = i->sector == sector && i->size == size;
2355 if (resolve_conflicts) {
2356 /*
2357 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002358 * overlapping request, it can be considered overwritten
2359 * and thus superseded; otherwise, it will be retried
2360 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002361 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002362 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002363 (i->size >> 9) >= sector + (size >> 9);
2364
2365 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002366 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002367 "local=%llus +%u, remote=%llus +%u, "
2368 "assuming %s came first\n",
2369 (unsigned long long)i->sector, i->size,
2370 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002371 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002372
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002373 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002374 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002375 list_add_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisner668700b2015-03-16 16:08:29 +01002376 queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002377
2378 err = -ENOENT;
2379 goto out;
2380 } else {
2381 struct drbd_request *req =
2382 container_of(i, struct drbd_request, i);
2383
2384 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002385 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002386 "local=%llus +%u, remote=%llus +%u\n",
2387 (unsigned long long)i->sector, i->size,
2388 (unsigned long long)sector, size);
2389
2390 if (req->rq_state & RQ_LOCAL_PENDING ||
2391 !(req->rq_state & RQ_POSTPONED)) {
2392 /*
2393 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002394 * decide if this request has been superseded
2395 * or needs to be retried.
2396 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002397 * disappear from the write_requests tree.
2398 *
2399 * In addition, wait for the conflicting
2400 * request to finish locally before submitting
2401 * the conflicting peer request.
2402 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002403 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002404 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002405 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002406 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002407 goto out;
2408 }
2409 goto repeat;
2410 }
2411 /*
2412 * Remember to restart the conflicting requests after
2413 * the new peer request has completed.
2414 */
2415 peer_req->flags |= EE_RESTART_REQUESTS;
2416 }
2417 }
2418 err = 0;
2419
2420 out:
2421 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002422 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002423 return err;
2424}
2425
Philipp Reisnerb411b362009-09-25 16:07:19 -07002426/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002427static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002428{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002429 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002430 struct drbd_device *device;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002431 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002432 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002433 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002434 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002435 u32 peer_seq = be32_to_cpu(p->seq_num);
Mike Christiebb3cc852016-06-05 14:32:06 -05002436 int op, op_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002438 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002439
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002440 peer_device = conn_peer_device(connection, pi->vnr);
2441 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002442 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002443 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002445 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002446 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002447
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002448 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2449 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002450 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002451 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002452 if (!err)
2453 err = err2;
2454 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002455 }
2456
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002457 /*
2458 * Corresponding put_ldev done either below (on various errors), or in
2459 * drbd_peer_request_endio, if we successfully submit the data at the
2460 * end of this function.
2461 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002462
2463 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002464 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002465 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002466 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002467 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002468 }
2469
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002470 peer_req->w.cb = e_end_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002471 peer_req->submit_jif = jiffies;
2472 peer_req->flags |= EE_APPLICATION;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473
Lars Ellenberg688593c2010-11-17 22:25:03 +01002474 dp_flags = be32_to_cpu(p->dp_flags);
Mike Christiebb3cc852016-06-05 14:32:06 -05002475 op = wire_flags_to_bio_op(dp_flags);
2476 op_flags = wire_flags_to_bio_flags(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002477 if (pi->cmd == P_TRIM) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002478 D_ASSERT(peer_device, peer_req->i.size > 0);
Christoph Hellwig45c21792017-04-05 19:21:22 +02002479 D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002480 D_ASSERT(peer_device, peer_req->pages == NULL);
2481 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002482 D_ASSERT(device, peer_req->i.size == 0);
2483 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002484 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002485
2486 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002487 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002488
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002489 spin_lock(&connection->epoch_lock);
2490 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002491 atomic_inc(&peer_req->epoch->epoch_size);
2492 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002493 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494
Philipp Reisner302bdea2011-04-21 11:36:49 +02002495 rcu_read_lock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002496 nc = rcu_dereference(peer_device->connection->net_conf);
2497 tp = nc->two_primaries;
2498 if (peer_device->connection->agreed_pro_version < 100) {
2499 switch (nc->wire_protocol) {
2500 case DRBD_PROT_C:
2501 dp_flags |= DP_SEND_WRITE_ACK;
2502 break;
2503 case DRBD_PROT_B:
2504 dp_flags |= DP_SEND_RECEIVE_ACK;
2505 break;
2506 }
2507 }
Philipp Reisner302bdea2011-04-21 11:36:49 +02002508 rcu_read_unlock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002509
2510 if (dp_flags & DP_SEND_WRITE_ACK) {
2511 peer_req->flags |= EE_SEND_WRITE_ACK;
2512 inc_unacked(device);
2513 /* corresponding dec_unacked() in e_end_block()
2514 * respective _drbd_clear_done_ee */
2515 }
2516
2517 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2518 /* I really don't like it that the receiver thread
2519 * sends on the msock, but anyways */
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02002520 drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002521 }
2522
Philipp Reisner302bdea2011-04-21 11:36:49 +02002523 if (tp) {
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002524 /* two primaries implies protocol C */
2525 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
Philipp Reisner302bdea2011-04-21 11:36:49 +02002526 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002527 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002528 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002529 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002530 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002531 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002532 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002533 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002534 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002535 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002536 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002538 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002539 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002540 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002541 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002542 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002543 }
Lars Ellenberg9104d312016-06-14 00:26:31 +02002544 /* TRIM and WRITE_SAME are processed synchronously,
2545 * we wait for all pending requests, respectively wait for
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002546 * active_ee to become empty in drbd_submit_peer_request();
2547 * better not add ourselves here. */
Lars Ellenberg9104d312016-06-14 00:26:31 +02002548 if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0)
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02002549 list_add_tail(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002550 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002552 if (device->state.conn == C_SYNC_TARGET)
2553 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002554
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002555 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002556 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002557 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002558 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002559 drbd_al_begin_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002560 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002561 }
2562
Mike Christiebb3cc852016-06-05 14:32:06 -05002563 err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2564 DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002565 if (!err)
2566 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002567
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002568 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002569 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002570 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002571 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002572 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002573 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002574 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2575 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002576 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002577 }
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002578
Philipp Reisnerb411b362009-09-25 16:07:19 -07002579out_interrupted:
Fabian Frederick7e5fec32016-06-14 00:26:35 +02002580 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002581 put_ldev(device);
2582 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002583 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584}
2585
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002586/* We may throttle resync, if the lower device seems to be busy,
2587 * and current sync rate is above c_min_rate.
2588 *
2589 * To decide whether or not the lower device is busy, we use a scheme similar
2590 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2591 * (more than 64 sectors) of activity we cannot account for with our own resync
2592 * activity, it obviously is "busy".
2593 *
2594 * The current sync rate used here uses only the most recent two step marks,
2595 * to have a short time average so we can react faster.
2596 */
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002597bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2598 bool throttle_if_app_is_waiting)
Lars Ellenberge8299872014-04-28 18:43:19 +02002599{
2600 struct lc_element *tmp;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002601 bool throttle = drbd_rs_c_min_rate_throttle(device);
Lars Ellenberge8299872014-04-28 18:43:19 +02002602
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002603 if (!throttle || throttle_if_app_is_waiting)
2604 return throttle;
Lars Ellenberge8299872014-04-28 18:43:19 +02002605
2606 spin_lock_irq(&device->al_lock);
2607 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2608 if (tmp) {
2609 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2610 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2611 throttle = false;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002612 /* Do not slow down if app IO is already waiting for this extent,
2613 * and our progress is necessary for application IO to complete. */
Lars Ellenberge8299872014-04-28 18:43:19 +02002614 }
2615 spin_unlock_irq(&device->al_lock);
2616
2617 return throttle;
2618}
2619
2620bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002621{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002622 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002623 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002624 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002625 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002626
2627 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002628 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002629 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002630
2631 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002632 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002633 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002634
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002635 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2636 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002637 atomic_read(&device->rs_sect_ev);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002638
2639 if (atomic_read(&device->ap_actlog_cnt)
Lars Ellenbergff8bd882014-11-10 17:21:12 +01002640 || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002641 unsigned long rs_left;
2642 int i;
2643
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002644 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002645
2646 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2647 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002648 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002649
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002650 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2651 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002652 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002653 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002654
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002655 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002656 if (!dt)
2657 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002658 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002659 dbdt = Bit2KB(db/dt);
2660
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002661 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002662 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002663 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002664 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002665}
2666
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002667static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002669 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002670 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002672 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002673 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002675 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002677 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002678
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002679 peer_device = conn_peer_device(connection, pi->vnr);
2680 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002681 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002682 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002683 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002684
2685 sector = be64_to_cpu(p->sector);
2686 size = be32_to_cpu(p->blksize);
2687
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002688 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002689 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002690 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002691 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002692 }
2693 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002694 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002695 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002696 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002697 }
2698
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002699 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002700 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002701 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002702 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002703 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002704 break;
Philipp Reisner700ca8c2016-06-14 00:26:13 +02002705 case P_RS_THIN_REQ:
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002706 case P_RS_DATA_REQUEST:
2707 case P_CSUM_RS_REQUEST:
2708 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002709 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002710 break;
2711 case P_OV_REPLY:
2712 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002713 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002714 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002715 break;
2716 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002717 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002718 }
2719 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002720 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002721 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002722
Lars Ellenberga821cc42010-09-06 12:31:37 +02002723 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002724 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002725 }
2726
2727 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2728 * "criss-cross" setup, that might cause write-out on some other DRBD,
2729 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002730 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
Lars Ellenberg9104d312016-06-14 00:26:31 +02002731 size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002732 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002733 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002734 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002735 }
2736
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002737 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002739 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002741 /* application IO, don't drbd_rs_begin_io */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002742 peer_req->flags |= EE_APPLICATION;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002743 goto submit;
2744
Philipp Reisner700ca8c2016-06-14 00:26:13 +02002745 case P_RS_THIN_REQ:
2746 /* If at some point in the future we have a smart way to
2747 find out if this data block is completely deallocated,
2748 then we would do something smarter here than reading
2749 the block... */
2750 peer_req->flags |= EE_RS_THIN_REQ;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002752 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002753 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002754 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002755 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 break;
2757
2758 case P_OV_REPLY:
2759 case P_CSUM_RS_REQUEST:
2760 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002761 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002762 if (!di)
2763 goto out_free_e;
2764
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002765 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 di->digest = (((char *)di)+sizeof(struct digest_info));
2767
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002768 peer_req->digest = di;
2769 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002770
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002771 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002772 goto out_free_e;
2773
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002774 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002775 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002776 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002777 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002778 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002779 /* remember to report stats in drbd_resync_finished */
2780 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002781 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002782 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002783 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002784 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002785 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002786 /* drbd_rs_begin_io done when we sent this request,
2787 * but accounting still needs to be done. */
2788 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789 }
2790 break;
2791
2792 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002793 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002794 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002795 unsigned long now = jiffies;
2796 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002797 device->ov_start_sector = sector;
2798 device->ov_position = sector;
2799 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2800 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002801 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002802 device->rs_mark_left[i] = device->ov_left;
2803 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002804 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002805 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002806 (unsigned long long)sector);
2807 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002808 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002809 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810 break;
2811
Philipp Reisnerb411b362009-09-25 16:07:19 -07002812 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002813 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002814 }
2815
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002816 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2817 * wrt the receiver, but it is not as straightforward as it may seem.
2818 * Various places in the resync start and stop logic assume resync
2819 * requests are processed in order, requeuing this on the worker thread
2820 * introduces a bunch of new code for synchronization between threads.
2821 *
2822 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2823 * "forever", throttling after drbd_rs_begin_io will lock that extent
2824 * for application writes for the same time. For now, just throttle
2825 * here, where the rest of the code expects the receiver to sleep for
2826 * a while, anyways.
2827 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002829 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2830 * this defers syncer requests for some time, before letting at least
2831 * on request through. The resync controller on the receiving side
2832 * will adapt to the incoming rate accordingly.
2833 *
2834 * We cannot throttle here if remote is Primary/SyncTarget:
2835 * we would also throttle its application reads.
2836 * In that case, throttling is done on the SyncTarget only.
2837 */
Lars Ellenbergc5a2c152014-05-08 10:08:05 +02002838
2839 /* Even though this may be a resync request, we do add to "read_ee";
2840 * "sync_ee" is only used for resync WRITEs.
2841 * Add to list early, so debugfs can find this request
2842 * even if we have to sleep below. */
2843 spin_lock_irq(&device->resource->req_lock);
2844 list_add_tail(&peer_req->w.list, &device->read_ee);
2845 spin_unlock_irq(&device->resource->req_lock);
2846
Lars Ellenberg944410e2014-05-06 15:02:05 +02002847 update_receiver_timing_details(connection, drbd_rs_should_slow_down);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002848 if (device->state.peer != R_PRIMARY
2849 && drbd_rs_should_slow_down(device, sector, false))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002850 schedule_timeout_uninterruptible(HZ/10);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002851 update_receiver_timing_details(connection, drbd_rs_begin_io);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002852 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002853 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002855submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002856 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002857
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002858submit:
Lars Ellenberg944410e2014-05-06 15:02:05 +02002859 update_receiver_timing_details(connection, drbd_submit_peer_request);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002860 inc_unacked(device);
Mike Christiebb3cc852016-06-05 14:32:06 -05002861 if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
2862 fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002863 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002864
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002865 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002866 drbd_err(device, "submit failed, triggering re-connect\n");
Lars Ellenbergc5a2c152014-05-08 10:08:05 +02002867
2868out_free_e:
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002869 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002870 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002871 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002872 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2873
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002874 put_ldev(device);
2875 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002876 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877}
2878
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002879/**
2880 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2881 */
2882static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002884 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002885 int self, peer, rv = -100;
2886 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002887 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002889 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2890 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002891
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002892 ch_peer = device->p_uuid[UI_SIZE];
2893 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894
Philipp Reisner44ed1672011-04-19 17:10:19 +02002895 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002896 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002897 rcu_read_unlock();
2898 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 case ASB_CONSENSUS:
2900 case ASB_DISCARD_SECONDARY:
2901 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002902 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002903 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904 break;
2905 case ASB_DISCONNECT:
2906 break;
2907 case ASB_DISCARD_YOUNGER_PRI:
2908 if (self == 0 && peer == 1) {
2909 rv = -1;
2910 break;
2911 }
2912 if (self == 1 && peer == 0) {
2913 rv = 1;
2914 break;
2915 }
2916 /* Else fall through to one of the other strategies... */
2917 case ASB_DISCARD_OLDER_PRI:
2918 if (self == 0 && peer == 1) {
2919 rv = 1;
2920 break;
2921 }
2922 if (self == 1 && peer == 0) {
2923 rv = -1;
2924 break;
2925 }
2926 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002927 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928 "Using discard-least-changes instead\n");
2929 case ASB_DISCARD_ZERO_CHG:
2930 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002931 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 ? -1 : 1;
2933 break;
2934 } else {
2935 if (ch_peer == 0) { rv = 1; break; }
2936 if (ch_self == 0) { rv = -1; break; }
2937 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002938 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002939 break;
2940 case ASB_DISCARD_LEAST_CHG:
2941 if (ch_self < ch_peer)
2942 rv = -1;
2943 else if (ch_self > ch_peer)
2944 rv = 1;
2945 else /* ( ch_self == ch_peer ) */
2946 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002947 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002948 ? -1 : 1;
2949 break;
2950 case ASB_DISCARD_LOCAL:
2951 rv = -1;
2952 break;
2953 case ASB_DISCARD_REMOTE:
2954 rv = 1;
2955 }
2956
2957 return rv;
2958}
2959
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002960/**
2961 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2962 */
2963static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002964{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002965 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002966 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002967 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002968
Philipp Reisner44ed1672011-04-19 17:10:19 +02002969 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002970 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002971 rcu_read_unlock();
2972 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973 case ASB_DISCARD_YOUNGER_PRI:
2974 case ASB_DISCARD_OLDER_PRI:
2975 case ASB_DISCARD_LEAST_CHG:
2976 case ASB_DISCARD_LOCAL:
2977 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002978 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002979 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002980 break;
2981 case ASB_DISCONNECT:
2982 break;
2983 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002984 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002985 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002987 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988 rv = hg;
2989 break;
2990 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002991 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002992 break;
2993 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002994 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002996 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002997 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002998 enum drbd_state_rv rv2;
2999
Philipp Reisnerb411b362009-09-25 16:07:19 -07003000 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3001 * we might be here in C_WF_REPORT_PARAMS which is transient.
3002 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003003 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003004 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003005 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003006 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003007 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 rv = hg;
3009 }
3010 } else
3011 rv = hg;
3012 }
3013
3014 return rv;
3015}
3016
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003017/**
3018 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
3019 */
3020static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003022 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01003023 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003024 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025
Philipp Reisner44ed1672011-04-19 17:10:19 +02003026 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003027 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003028 rcu_read_unlock();
3029 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003030 case ASB_DISCARD_YOUNGER_PRI:
3031 case ASB_DISCARD_OLDER_PRI:
3032 case ASB_DISCARD_LEAST_CHG:
3033 case ASB_DISCARD_LOCAL:
3034 case ASB_DISCARD_REMOTE:
3035 case ASB_CONSENSUS:
3036 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02003037 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003038 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039 break;
3040 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003041 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003042 break;
3043 case ASB_DISCONNECT:
3044 break;
3045 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003046 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003047 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003048 enum drbd_state_rv rv2;
3049
Philipp Reisnerb411b362009-09-25 16:07:19 -07003050 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3051 * we might be here in C_WF_REPORT_PARAMS which is transient.
3052 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003053 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003054 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003055 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003057 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 rv = hg;
3059 }
3060 } else
3061 rv = hg;
3062 }
3063
3064 return rv;
3065}
3066
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003067static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068 u64 bits, u64 flags)
3069{
3070 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003071 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072 return;
3073 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003074 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003075 text,
3076 (unsigned long long)uuid[UI_CURRENT],
3077 (unsigned long long)uuid[UI_BITMAP],
3078 (unsigned long long)uuid[UI_HISTORY_START],
3079 (unsigned long long)uuid[UI_HISTORY_END],
3080 (unsigned long long)bits,
3081 (unsigned long long)flags);
3082}
3083
3084/*
3085 100 after split brain try auto recover
3086 2 C_SYNC_SOURCE set BitMap
3087 1 C_SYNC_SOURCE use BitMap
3088 0 no Sync
3089 -1 C_SYNC_TARGET use BitMap
3090 -2 C_SYNC_TARGET set BitMap
3091 -100 after split brain, disconnect
3092-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01003093-1091 requires proto 91
3094-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 */
Lars Ellenbergf2d3d75b2016-06-14 00:26:32 +02003096
3097static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003098{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003099 struct drbd_peer_device *const peer_device = first_peer_device(device);
3100 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 u64 self, peer;
3102 int i, j;
3103
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003104 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3105 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106
3107 *rule_nr = 10;
3108 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3109 return 0;
3110
3111 *rule_nr = 20;
3112 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3113 peer != UUID_JUST_CREATED)
3114 return -2;
3115
3116 *rule_nr = 30;
3117 if (self != UUID_JUST_CREATED &&
3118 (peer == UUID_JUST_CREATED || peer == (u64)0))
3119 return 2;
3120
3121 if (self == peer) {
3122 int rct, dc; /* roles at crash time */
3123
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003124 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003126 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003127 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003129 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3130 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003131 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003132 drbd_uuid_move_history(device);
3133 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3134 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003135
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003136 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3137 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003138 *rule_nr = 34;
3139 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003140 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141 *rule_nr = 36;
3142 }
3143
3144 return 1;
3145 }
3146
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003147 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003149 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003150 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003152 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3153 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003154 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003155
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003156 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3157 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3158 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003160 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 *rule_nr = 35;
3162 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003163 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 *rule_nr = 37;
3165 }
3166
3167 return -1;
3168 }
3169
3170 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003171 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3172 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 /* lowest bit is set when we were primary,
3174 * next bit (weight 2) is set when peer was primary */
3175 *rule_nr = 40;
3176
Lars Ellenbergf2d3d75b2016-06-14 00:26:32 +02003177 /* Neither has the "crashed primary" flag set,
3178 * only a replication link hickup. */
3179 if (rct == 0)
3180 return 0;
3181
3182 /* Current UUID equal and no bitmap uuid; does not necessarily
3183 * mean this was a "simultaneous hard crash", maybe IO was
3184 * frozen, so no UUID-bump happened.
3185 * This is a protocol change, overload DRBD_FF_WSAME as flag
3186 * for "new-enough" peer DRBD version. */
3187 if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3188 *rule_nr = 41;
3189 if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3190 drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3191 return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3192 }
3193 if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3194 /* At least one has the "crashed primary" bit set,
3195 * both are primary now, but neither has rotated its UUIDs?
3196 * "Can not happen." */
3197 drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3198 return -100;
3199 }
3200 if (device->state.role == R_PRIMARY)
3201 return 1;
3202 return -1;
3203 }
3204
3205 /* Both are secondary.
3206 * Really looks like recovery from simultaneous hard crash.
3207 * Check which had been primary before, and arbitrate. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003208 switch (rct) {
Lars Ellenbergf2d3d75b2016-06-14 00:26:32 +02003209 case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210 case 1: /* self_pri && !peer_pri */ return 1;
3211 case 2: /* !self_pri && peer_pri */ return -1;
3212 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003213 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 return dc ? -1 : 1;
3215 }
3216 }
3217
3218 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003219 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220 if (self == peer)
3221 return -1;
3222
3223 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003224 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003225 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003226 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003227 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3228 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3229 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 /* The last P_SYNC_UUID did not get though. Undo the last start of
3231 resync as sync source modifications of the peer's UUIDs. */
3232
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003233 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003234 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003236 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3237 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01003238
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003239 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003240 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01003241
Philipp Reisnerb411b362009-09-25 16:07:19 -07003242 return -1;
3243 }
3244 }
3245
3246 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003247 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003249 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250 if (self == peer)
3251 return -2;
3252 }
3253
3254 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003255 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3256 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257 if (self == peer)
3258 return 1;
3259
3260 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003261 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003263 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003264 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3265 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3266 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003267 /* The last P_SYNC_UUID did not get though. Undo the last start of
3268 resync as sync source modifications of our UUIDs. */
3269
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003270 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003271 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003273 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3274 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003275
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003276 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003277 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3278 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003279
3280 return 1;
3281 }
3282 }
3283
3284
3285 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003286 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003287 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003288 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003289 if (self == peer)
3290 return 2;
3291 }
3292
3293 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003294 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3295 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 if (self == peer && self != ((u64)0))
3297 return 100;
3298
3299 *rule_nr = 100;
3300 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003301 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003303 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003304 if (self == peer)
3305 return -100;
3306 }
3307 }
3308
3309 return -1000;
3310}
3311
3312/* drbd_sync_handshake() returns the new conn state on success, or
3313 CONN_MASK (-1) on failure.
3314 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003315static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3316 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003317 enum drbd_disk_state peer_disk) __must_hold(local)
3318{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003319 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003320 enum drbd_conns rv = C_MASK;
3321 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003322 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003323 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003325 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003327 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003329 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003330
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003331 spin_lock_irq(&device->ldev->md.uuid_lock);
3332 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3333 drbd_uuid_dump(device, "peer", device->p_uuid,
3334 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335
Lars Ellenbergf2d3d75b2016-06-14 00:26:32 +02003336 hg = drbd_uuid_compare(device, peer_role, &rule_nr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003337 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003338
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003339 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340
3341 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003342 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 return C_MASK;
3344 }
Lars Ellenbergf2d3d75b2016-06-14 00:26:32 +02003345 if (hg < -0x10000) {
3346 int proto, fflags;
3347 hg = -hg;
3348 proto = hg & 0xff;
3349 fflags = (hg >> 8) & 0xff;
3350 drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3351 proto, fflags);
3352 return C_MASK;
3353 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003354 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003355 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003356 return C_MASK;
3357 }
3358
3359 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3360 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3361 int f = (hg == -100) || abs(hg) == 2;
3362 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3363 if (f)
3364 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003365 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366 hg > 0 ? "source" : "target");
3367 }
3368
Adam Gandelman3a11a482010-04-08 16:48:23 -07003369 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003370 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003371
Philipp Reisner44ed1672011-04-19 17:10:19 +02003372 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003373 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003374
3375 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003376 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003377 + (peer_role == R_PRIMARY);
3378 int forced = (hg == -100);
3379
3380 switch (pcount) {
3381 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003382 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003383 break;
3384 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003385 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003386 break;
3387 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003388 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003389 break;
3390 }
3391 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003392 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003393 "automatically solved. Sync from %s node\n",
3394 pcount, (hg < 0) ? "peer" : "this");
3395 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003396 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003397 " UUIDs where ambiguous.\n");
3398 hg = hg*2;
3399 }
3400 }
3401 }
3402
3403 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003404 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003406 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003407 hg = 1;
3408
3409 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003410 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003411 "Sync from %s node\n",
3412 (hg < 0) ? "peer" : "this");
3413 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003414 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003415 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003416 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417
3418 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003419 /* FIXME this log message is not correct if we end up here
3420 * after an attempted attach on a diskless node.
3421 * We just refuse to attach -- well, we drop the "connection"
3422 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003423 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003424 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425 return C_MASK;
3426 }
3427
3428 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003429 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003430 return C_MASK;
3431 }
3432
3433 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003434 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003435 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003437 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438 /* fall through */
3439 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003440 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003441 return C_MASK;
3442 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003443 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003444 "assumption\n");
3445 }
3446 }
3447
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003448 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003449 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003450 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003451 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003452 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003453 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3454 abs(hg) >= 2 ? "full" : "bit-map based");
3455 return C_MASK;
3456 }
3457
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003459 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003460 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003461 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003462 return C_MASK;
3463 }
3464
3465 if (hg > 0) { /* become sync source. */
3466 rv = C_WF_BITMAP_S;
3467 } else if (hg < 0) { /* become sync target */
3468 rv = C_WF_BITMAP_T;
3469 } else {
3470 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003471 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003472 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003473 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003474 }
3475 }
3476
3477 return rv;
3478}
3479
Philipp Reisnerf179d762011-05-16 17:31:47 +02003480static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481{
3482 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003483 if (peer == ASB_DISCARD_REMOTE)
3484 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485
3486 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003487 if (peer == ASB_DISCARD_LOCAL)
3488 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003489
3490 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003491 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003492}
3493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003494static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003495{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003496 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003497 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3498 int p_proto, p_discard_my_data, p_two_primaries, cf;
3499 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3500 char integrity_alg[SHARED_SECRET_MAX] = "";
Herbert Xu9534d672016-01-24 21:19:21 +08003501 struct crypto_ahash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003502 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003503
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504 p_proto = be32_to_cpu(p->protocol);
3505 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3506 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3507 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003509 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003510 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003511
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003512 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003513 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003514
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003515 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003516 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003517 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003518 if (err)
3519 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003520 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003521 }
3522
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003523 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003524 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003525
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003526 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003527 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003528
3529 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003530 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003531
3532 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003533 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003534 goto disconnect_rcu_unlock;
3535 }
3536
3537 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003538 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003539 goto disconnect_rcu_unlock;
3540 }
3541
3542 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003543 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003544 goto disconnect_rcu_unlock;
3545 }
3546
3547 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003548 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003549 goto disconnect_rcu_unlock;
3550 }
3551
3552 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003553 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003554 goto disconnect_rcu_unlock;
3555 }
3556
3557 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003558 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003559 goto disconnect_rcu_unlock;
3560 }
3561
3562 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003563 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003564 goto disconnect_rcu_unlock;
3565 }
3566
3567 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003568 }
3569
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003570 if (integrity_alg[0]) {
3571 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003573 /*
3574 * We can only change the peer data integrity algorithm
3575 * here. Changing our own data integrity algorithm
3576 * requires that we send a P_PROTOCOL_UPDATE packet at
3577 * the same time; otherwise, the peer has no way to
3578 * tell between which packets the algorithm should
3579 * change.
3580 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003581
Herbert Xu9534d672016-01-24 21:19:21 +08003582 peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
Lars Ellenberg1b57e662016-06-14 00:26:39 +02003583 if (IS_ERR(peer_integrity_tfm)) {
3584 peer_integrity_tfm = NULL;
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003585 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003586 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587 goto disconnect;
3588 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003589
Herbert Xu9534d672016-01-24 21:19:21 +08003590 hash_size = crypto_ahash_digestsize(peer_integrity_tfm);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003591 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3592 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3593 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003594 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003595 goto disconnect;
3596 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597 }
3598
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003599 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3600 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003601 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003602 goto disconnect;
3603 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003605 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003606 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003607 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003608 *new_net_conf = *old_net_conf;
3609
3610 new_net_conf->wire_protocol = p_proto;
3611 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3612 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3613 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3614 new_net_conf->two_primaries = p_two_primaries;
3615
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003616 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003617 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003618 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003619
Herbert Xu9534d672016-01-24 21:19:21 +08003620 crypto_free_ahash(connection->peer_integrity_tfm);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003621 kfree(connection->int_dig_in);
3622 kfree(connection->int_dig_vv);
3623 connection->peer_integrity_tfm = peer_integrity_tfm;
3624 connection->int_dig_in = int_dig_in;
3625 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003626
3627 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003628 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003629 integrity_alg[0] ? integrity_alg : "(none)");
3630
3631 synchronize_rcu();
3632 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003633 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634
Philipp Reisner44ed1672011-04-19 17:10:19 +02003635disconnect_rcu_unlock:
3636 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637disconnect:
Herbert Xu9534d672016-01-24 21:19:21 +08003638 crypto_free_ahash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003639 kfree(int_dig_in);
3640 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003641 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003642 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003643}
3644
3645/* helper function
3646 * input: alg name, feature name
3647 * return: NULL (alg name was "")
3648 * ERR_PTR(error) if something goes wrong
3649 * or the crypto hash ptr, if it worked out ok. */
Herbert Xu9534d672016-01-24 21:19:21 +08003650static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651 const char *alg, const char *name)
3652{
Herbert Xu9534d672016-01-24 21:19:21 +08003653 struct crypto_ahash *tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654
3655 if (!alg[0])
3656 return NULL;
3657
Herbert Xu9534d672016-01-24 21:19:21 +08003658 tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003660 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 alg, name, PTR_ERR(tfm));
3662 return tfm;
3663 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003664 return tfm;
3665}
3666
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003667static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003669 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003670 int size = pi->size;
3671
3672 while (size) {
3673 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003674 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003675 if (s <= 0) {
3676 if (s < 0)
3677 return s;
3678 break;
3679 }
3680 size -= s;
3681 }
3682 if (size)
3683 return -EIO;
3684 return 0;
3685}
3686
3687/*
3688 * config_unknown_volume - device configuration command for unknown volume
3689 *
3690 * When a device is added to an existing connection, the node on which the
3691 * device is added first will send configuration commands to its peer but the
3692 * peer will not know about the device yet. It will warn and ignore these
3693 * commands. Once the device is added on the second node, the second node will
3694 * send the same device configuration commands, but in the other direction.
3695 *
3696 * (We can also end up here if drbd is misconfigured.)
3697 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003698static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003699{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003700 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003701 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003702 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003703}
3704
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003705static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003706{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003707 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003708 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003709 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710 unsigned int header_size, data_size, exp_max_sz;
Herbert Xu9534d672016-01-24 21:19:21 +08003711 struct crypto_ahash *verify_tfm = NULL;
3712 struct crypto_ahash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003713 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003714 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003715 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003716 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003717 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003718 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003719
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003720 peer_device = conn_peer_device(connection, pi->vnr);
3721 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003722 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003723 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003724
3725 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3726 : apv == 88 ? sizeof(struct p_rs_param)
3727 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003728 : apv <= 94 ? sizeof(struct p_rs_param_89)
3729 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003730
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003731 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003732 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003733 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003734 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735 }
3736
3737 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003738 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003739 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003740 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003741 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003742 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003743 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003744 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003745 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003746 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003747 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 }
3749
3750 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003751 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003752 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3753
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003754 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003755 if (err)
3756 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003758 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003759 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003760 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003761 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3762 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003763 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003764 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003765 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003766 return -ENOMEM;
3767 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003768
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003769 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003770 *new_disk_conf = *old_disk_conf;
3771
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003772 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003773 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003774
3775 if (apv >= 88) {
3776 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003777 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003778 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003779 "peer wants %u, accepting only up to %u byte\n",
3780 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003781 err = -EIO;
3782 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003783 }
3784
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003785 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003786 if (err)
3787 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003788 /* we expect NUL terminated string */
3789 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003790 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 p->verify_alg[data_size-1] = 0;
3792
3793 } else /* apv >= 89 */ {
3794 /* we still expect NUL terminated strings */
3795 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003796 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3797 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3799 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3800 }
3801
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003802 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003803 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003804 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003805 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003806 goto disconnect;
3807 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003808 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809 p->verify_alg, "verify-alg");
3810 if (IS_ERR(verify_tfm)) {
3811 verify_tfm = NULL;
3812 goto disconnect;
3813 }
3814 }
3815
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003816 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003817 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003818 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003819 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 goto disconnect;
3821 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003822 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003823 p->csums_alg, "csums-alg");
3824 if (IS_ERR(csums_tfm)) {
3825 csums_tfm = NULL;
3826 goto disconnect;
3827 }
3828 }
3829
Philipp Reisner813472c2011-05-03 16:47:02 +02003830 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003831 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3832 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3833 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3834 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003835
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003836 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003837 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003838 new_plan = fifo_alloc(fifo_size);
3839 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003840 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003841 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003842 goto disconnect;
3843 }
3844 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003845 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003847 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003848 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3849 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003850 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003851 goto disconnect;
3852 }
3853
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003854 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003855
3856 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003857 strcpy(new_net_conf->verify_alg, p->verify_alg);
3858 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Herbert Xu9534d672016-01-24 21:19:21 +08003859 crypto_free_ahash(peer_device->connection->verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003860 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003861 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003862 }
3863 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003864 strcpy(new_net_conf->csums_alg, p->csums_alg);
3865 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Herbert Xu9534d672016-01-24 21:19:21 +08003866 crypto_free_ahash(peer_device->connection->csums_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003867 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003868 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003869 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003870 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003872 }
3873
Philipp Reisner813472c2011-05-03 16:47:02 +02003874 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003875 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3876 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003877 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003878
3879 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003880 old_plan = device->rs_plan_s;
3881 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003882 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003883
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003884 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003885 synchronize_rcu();
3886 if (new_net_conf)
3887 kfree(old_net_conf);
3888 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003889 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003890
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003891 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892
Philipp Reisner813472c2011-05-03 16:47:02 +02003893reconnect:
3894 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003895 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003896 kfree(new_disk_conf);
3897 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003898 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003899 return -EIO;
3900
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003902 kfree(new_plan);
3903 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003904 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003905 kfree(new_disk_conf);
3906 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003907 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003908 /* just for completeness: actually not needed,
3909 * as this is not reached if csums_tfm was ok. */
Herbert Xu9534d672016-01-24 21:19:21 +08003910 crypto_free_ahash(csums_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911 /* but free the verify_tfm again, if csums_tfm did not work out */
Herbert Xu9534d672016-01-24 21:19:21 +08003912 crypto_free_ahash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003913 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003914 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915}
3916
Philipp Reisnerb411b362009-09-25 16:07:19 -07003917/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003918static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003919 const char *s, sector_t a, sector_t b)
3920{
3921 sector_t d;
3922 if (a == 0 || b == 0)
3923 return;
3924 d = (a > b) ? (a - b) : (b - a);
3925 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003926 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927 (unsigned long long)a, (unsigned long long)b);
3928}
3929
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003930static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003931{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003932 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003933 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003934 struct p_sizes *p = pi->data;
Lars Ellenberg9104d312016-06-14 00:26:31 +02003935 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003936 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003937 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003939 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003941 peer_device = conn_peer_device(connection, pi->vnr);
3942 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003943 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003944 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003945
Philipp Reisnerb411b362009-09-25 16:07:19 -07003946 p_size = be64_to_cpu(p->d_size);
3947 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003948 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003949
Philipp Reisnerb411b362009-09-25 16:07:19 -07003950 /* just store the peer's disk size for now.
3951 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003952 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003953
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003954 if (get_ldev(device)) {
Lars Ellenberg60bac042016-06-14 00:26:30 +02003955 sector_t new_size, cur_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003956 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003957 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003958 rcu_read_unlock();
3959
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003960 warn_if_differ_considerably(device, "lower level device sizes",
3961 p_size, drbd_get_max_capacity(device->ldev));
3962 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003963 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003964
3965 /* if this is the first connect, or an otherwise expected
3966 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003967 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003968 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003969
3970 /* Never shrink a device with usable data during connect.
3971 But allow online shrinking if we are connected. */
Lars Ellenberg60bac042016-06-14 00:26:30 +02003972 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
3973 cur_size = drbd_get_capacity(device->this_bdev);
3974 if (new_size < cur_size &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003975 device->state.disk >= D_OUTDATED &&
3976 device->state.conn < C_CONNECTED) {
Lars Ellenberg60bac042016-06-14 00:26:30 +02003977 drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
3978 (unsigned long long)new_size, (unsigned long long)cur_size);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003979 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003980 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003981 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003982 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003983
3984 if (my_usize != p_usize) {
3985 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3986
3987 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3988 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003989 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003990 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003991 return -ENOMEM;
3992 }
3993
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003994 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003995 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003996 *new_disk_conf = *old_disk_conf;
3997 new_disk_conf->disk_size = p_usize;
3998
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003999 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004000 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004001 synchronize_rcu();
4002 kfree(old_disk_conf);
4003
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004004 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004005 (unsigned long)my_usize);
4006 }
4007
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004008 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004010
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004011 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02004012 /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004013 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02004014 drbd_reconsider_queue_parameters(), we can be sure that after
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004015 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
4016
Philipp Reisnere89b5912010-03-24 17:11:33 +01004017 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004018 if (get_ldev(device)) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02004019 drbd_reconsider_queue_parameters(device, device->ldev, o);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004020 dd = drbd_determine_dev_size(device, ddsf, NULL);
4021 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02004022 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004023 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004024 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01004026 /*
4027 * I am diskless, need to accept the peer's *current* size.
4028 * I must NOT accept the peers backing disk size,
4029 * it may have been larger than mine all along...
4030 *
4031 * At this point, the peer knows more about my disk, or at
4032 * least about what we last agreed upon, than myself.
4033 * So if his c_size is less than his d_size, the most likely
4034 * reason is that *my* d_size was smaller last time we checked.
4035 *
4036 * However, if he sends a zero current size,
4037 * take his (user-capped or) backing disk size anyways.
4038 */
Lars Ellenberg9104d312016-06-14 00:26:31 +02004039 drbd_reconsider_queue_parameters(device, NULL, o);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01004040 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 }
4042
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004043 if (get_ldev(device)) {
4044 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4045 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046 ldsc = 1;
4047 }
4048
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004049 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 }
4051
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004052 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004054 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004055 /* we have different sizes, probably peer
4056 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004057 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004058 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004059 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4060 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4061 if (device->state.pdsk >= D_INCONSISTENT &&
4062 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01004063 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004064 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01004065 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004066 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01004067 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004068 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069 }
4070 }
4071
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004072 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073}
4074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004075static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004076{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004077 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004078 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004079 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004081 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004083 peer_device = conn_peer_device(connection, pi->vnr);
4084 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004085 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004086 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004087
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08004089 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004090 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08004091 return false;
4092 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093
4094 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4095 p_uuid[i] = be64_to_cpu(p->uuid[i]);
4096
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004097 kfree(device->p_uuid);
4098 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004100 if (device->state.conn < C_CONNECTED &&
4101 device->state.disk < D_INCONSISTENT &&
4102 device->state.role == R_PRIMARY &&
4103 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004104 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004105 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004106 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004107 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 }
4109
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004110 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004112 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004113 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004114 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115 (p_uuid[UI_FLAGS] & 8);
4116 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004117 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004118 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004119 "clear_n_write from receive_uuids",
4120 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004121 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4122 _drbd_uuid_set(device, UI_BITMAP, 0);
4123 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004125 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004126 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004127 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004128 put_ldev(device);
4129 } else if (device->state.disk < D_INCONSISTENT &&
4130 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02004131 /* I am a diskless primary, the peer just created a new current UUID
4132 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004133 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004134 }
4135
4136 /* Before we test for the disk state, we should wait until an eventually
4137 ongoing cluster wide state change is finished. That is important if
4138 we are primary and are detaching from our disk. We need to see the
4139 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004140 mutex_lock(device->state_mutex);
4141 mutex_unlock(device->state_mutex);
4142 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4143 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004144
4145 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004146 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004148 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149}
4150
4151/**
4152 * convert_state() - Converts the peer's view of the cluster state to our point of view
4153 * @ps: The state as seen by the peer.
4154 */
4155static union drbd_state convert_state(union drbd_state ps)
4156{
4157 union drbd_state ms;
4158
4159 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02004160 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004161 [C_CONNECTED] = C_CONNECTED,
4162
4163 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4164 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4165 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4166 [C_VERIFY_S] = C_VERIFY_T,
4167 [C_MASK] = C_MASK,
4168 };
4169
4170 ms.i = ps.i;
4171
4172 ms.conn = c_tab[ps.conn];
4173 ms.peer = ps.role;
4174 ms.role = ps.peer;
4175 ms.pdsk = ps.disk;
4176 ms.disk = ps.pdsk;
4177 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4178
4179 return ms;
4180}
4181
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004182static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004184 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004185 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004186 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004187 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01004188 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004190 peer_device = conn_peer_device(connection, pi->vnr);
4191 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004192 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004193 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004194
Philipp Reisnerb411b362009-09-25 16:07:19 -07004195 mask.i = be32_to_cpu(p->mask);
4196 val.i = be32_to_cpu(p->val);
4197
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004198 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004199 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004200 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004201 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004202 }
4203
4204 mask = convert_state(mask);
4205 val = convert_state(val);
4206
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004207 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004208 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004209
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004210 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004211
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004212 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004213}
4214
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004215static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004217 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004218 union drbd_state mask, val;
4219 enum drbd_state_rv rv;
4220
4221 mask.i = be32_to_cpu(p->mask);
4222 val.i = be32_to_cpu(p->val);
4223
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004224 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4225 mutex_is_locked(&connection->cstate_mutex)) {
4226 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004227 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004228 }
4229
4230 mask = convert_state(mask);
4231 val = convert_state(val);
4232
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004233 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4234 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004235
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004236 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004237}
4238
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004239static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004241 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004242 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004243 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004244 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02004246 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247 int rv;
4248
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004249 peer_device = conn_peer_device(connection, pi->vnr);
4250 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004251 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004252 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004253
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 peer_state.i = be32_to_cpu(p->state);
4255
4256 real_peer_disk = peer_state.disk;
4257 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004258 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004259 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 }
4261
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004262 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004264 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004265 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266
Philipp Reisner668700b2015-03-16 16:08:29 +01004267 /* If some other part of the code (ack_receiver thread, timeout)
Lars Ellenberg545752d2011-12-05 14:39:25 +01004268 * already decided to close the connection again,
4269 * we must not "re-establish" it here. */
4270 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004271 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01004272
Lars Ellenberg40424e42011-09-26 15:24:56 +02004273 /* If this is the "end of sync" confirmation, usually the peer disk
4274 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4275 * set) resync started in PausedSyncT, or if the timing of pause-/
4276 * unpause-sync events has been "just right", the peer disk may
4277 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4278 */
4279 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4280 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004281 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4282 /* If we are (becoming) SyncSource, but peer is still in sync
4283 * preparation, ignore its uptodate-ness to avoid flapping, it
4284 * will change to inconsistent once the peer reaches active
4285 * syncing states.
4286 * It may have changed syncer-paused flags, however, so we
4287 * cannot ignore this completely. */
4288 if (peer_state.conn > C_CONNECTED &&
4289 peer_state.conn < C_SYNC_SOURCE)
4290 real_peer_disk = D_INCONSISTENT;
4291
4292 /* if peer_state changes to connected at the same time,
4293 * it explicitly notifies us that it finished resync.
4294 * Maybe we should finish it up, too? */
4295 else if (os.conn >= C_SYNC_SOURCE &&
4296 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004297 if (drbd_bm_total_weight(device) <= device->rs_failed)
4298 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004299 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004300 }
4301 }
4302
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004303 /* explicit verify finished notification, stop sector reached. */
4304 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4305 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004306 ov_out_of_sync_print(device);
4307 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004308 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004309 }
4310
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004311 /* peer says his disk is inconsistent, while we think it is uptodate,
4312 * and this happens while the peer still thinks we have a sync going on,
4313 * but we think we are already done with the sync.
4314 * We ignore this to avoid flapping pdsk.
4315 * This should not happen, if the peer is a recent version of drbd. */
4316 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4317 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4318 real_peer_disk = D_UP_TO_DATE;
4319
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004320 if (ns.conn == C_WF_REPORT_PARAMS)
4321 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322
Philipp Reisner67531712010-10-27 12:21:30 +02004323 if (peer_state.conn == C_AHEAD)
4324 ns.conn = C_BEHIND;
4325
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004326 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4327 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328 int cr; /* consider resync */
4329
4330 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004331 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332 /* if we had an established connection
4333 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004334 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004336 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337 /* if we have both been inconsistent, and the peer has been
4338 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004339 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340 /* if we had been plain connected, and the admin requested to
4341 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004342 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343 (peer_state.conn >= C_STARTING_SYNC_S &&
4344 peer_state.conn <= C_WF_BITMAP_T));
4345
4346 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004347 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004349 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004350 if (ns.conn == C_MASK) {
4351 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004352 if (device->state.disk == D_NEGOTIATING) {
4353 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004355 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004357 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004359 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004360 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004361 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004362 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004363 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 }
4365 }
4366 }
4367
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004368 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004369 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004370 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004371 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 ns.peer = peer_state.role;
4373 ns.pdsk = real_peer_disk;
4374 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004375 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004376 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004377 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004378 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4379 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004380 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004381 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004382 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004383 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004384 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004385 drbd_uuid_new_current(device);
4386 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004387 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004388 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004389 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4391 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004392 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004393
4394 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004395 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004396 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397 }
4398
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004399 if (os.conn > C_WF_REPORT_PARAMS) {
4400 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401 peer_state.disk != D_NEGOTIATING ) {
4402 /* we want resync, peer has not yet decided to sync... */
4403 /* Nowadays only used when forcing a node into primary role and
4404 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004405 drbd_send_uuids(peer_device);
4406 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 }
4408 }
4409
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004410 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004411
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004412 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004414 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415}
4416
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004417static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004419 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004420 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004421 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004422
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004423 peer_device = conn_peer_device(connection, pi->vnr);
4424 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004425 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004426 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004428 wait_event(device->misc_wait,
4429 device->state.conn == C_WF_SYNC_UUID ||
4430 device->state.conn == C_BEHIND ||
4431 device->state.conn < C_CONNECTED ||
4432 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004434 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004435
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436 /* Here the _drbd_uuid_ functions are right, current should
4437 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004438 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4439 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4440 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004442 drbd_print_uuids(device, "updated sync uuid");
4443 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004445 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004446 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004447 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004448
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004449 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004450}
4451
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004452/**
4453 * receive_bitmap_plain
4454 *
4455 * Return 0 when done, 1 when another iteration is needed, and a negative error
4456 * code upon failure.
4457 */
4458static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004459receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004460 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004462 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004463 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004464 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004465 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004466 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004467 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004469 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004470 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004471 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004472 }
4473 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004474 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004475 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004476 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004477 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004479 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480
4481 c->word_offset += num_words;
4482 c->bit_offset = c->word_offset * BITS_PER_LONG;
4483 if (c->bit_offset > c->bm_bits)
4484 c->bit_offset = c->bm_bits;
4485
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004486 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487}
4488
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004489static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4490{
4491 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4492}
4493
4494static int dcbp_get_start(struct p_compressed_bm *p)
4495{
4496 return (p->encoding & 0x80) != 0;
4497}
4498
4499static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4500{
4501 return (p->encoding >> 4) & 0x7;
4502}
4503
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004504/**
4505 * recv_bm_rle_bits
4506 *
4507 * Return 0 when done, 1 when another iteration is needed, and a negative error
4508 * code upon failure.
4509 */
4510static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004511recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004513 struct bm_xfer_ctx *c,
4514 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004515{
4516 struct bitstream bs;
4517 u64 look_ahead;
4518 u64 rl;
4519 u64 tmp;
4520 unsigned long s = c->bit_offset;
4521 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004522 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523 int have;
4524 int bits;
4525
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004526 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004527
4528 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4529 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004530 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531
4532 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4533 bits = vli_decode_bits(&rl, look_ahead);
4534 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004535 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004536
4537 if (toggle) {
4538 e = s + rl -1;
4539 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004540 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004541 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004543 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004544 }
4545
4546 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004547 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004548 have, bits, look_ahead,
4549 (unsigned int)(bs.cur.b - p->code),
4550 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004551 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004553 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4554 if (likely(bits < 64))
4555 look_ahead >>= bits;
4556 else
4557 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004558 have -= bits;
4559
4560 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4561 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004562 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563 look_ahead |= tmp << have;
4564 have += bits;
4565 }
4566
4567 c->bit_offset = s;
4568 bm_xfer_ctx_bit_to_word_offset(c);
4569
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004570 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571}
4572
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004573/**
4574 * decode_bitmap_c
4575 *
4576 * Return 0 when done, 1 when another iteration is needed, and a negative error
4577 * code upon failure.
4578 */
4579static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004580decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004581 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004582 struct bm_xfer_ctx *c,
4583 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004584{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004585 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004586 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587
4588 /* other variants had been implemented for evaluation,
4589 * but have been dropped as this one turned out to be "best"
4590 * during all our tests. */
4591
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004592 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4593 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004594 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595}
4596
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004597void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598 const char *direction, struct bm_xfer_ctx *c)
4599{
4600 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004601 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004602 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4603 unsigned int plain =
4604 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4605 c->bm_words * sizeof(unsigned long);
4606 unsigned int total = c->bytes[0] + c->bytes[1];
4607 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004608
4609 /* total can not be zero. but just in case: */
4610 if (total == 0)
4611 return;
4612
4613 /* don't report if not compressed */
4614 if (total >= plain)
4615 return;
4616
4617 /* total < plain. check for overflow, still */
4618 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4619 : (1000 * total / plain);
4620
4621 if (r > 1000)
4622 r = 1000;
4623
4624 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004625 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004626 "total %u; compression: %u.%u%%\n",
4627 direction,
4628 c->bytes[1], c->packets[1],
4629 c->bytes[0], c->packets[0],
4630 total, r/10, r % 10);
4631}
4632
4633/* Since we are processing the bitfield from lower addresses to higher,
4634 it does not matter if the process it in 32 bit chunks or 64 bit
4635 chunks as long as it is little endian. (Understand it as byte stream,
4636 beginning with the lowest byte...) If we would use big endian
4637 we would need to process it from the highest address to the lowest,
4638 in order to be agnostic to the 32 vs 64 bits issue.
4639
4640 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004641static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004643 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004644 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004646 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004647
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004648 peer_device = conn_peer_device(connection, pi->vnr);
4649 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004650 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004651 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004652
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004653 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004654 /* you are supposed to send additional out-of-sync information
4655 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004658 .bm_bits = drbd_bm_bits(device),
4659 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660 };
4661
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004662 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004663 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004664 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004665 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666 /* MAYBE: sanity check that we speak proto >= 90,
4667 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004668 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004670 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004671 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004672 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673 goto out;
4674 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004675 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004676 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004677 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004678 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004680 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004681 if (err)
4682 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004683 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004684 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004685 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004686 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 goto out;
4688 }
4689
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004690 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004691 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004693 if (err <= 0) {
4694 if (err < 0)
4695 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004696 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004697 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004698 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004699 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004700 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004701 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004703 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004704
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004705 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004706 enum drbd_state_rv rv;
4707
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004708 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004709 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710 goto out;
4711 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004712 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004713 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004714 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715 /* admin may have requested C_DISCONNECTING,
4716 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004717 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004718 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004720 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004721
Philipp Reisnerb411b362009-09-25 16:07:19 -07004722 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004723 drbd_bm_unlock(device);
4724 if (!err && device->state.conn == C_WF_BITMAP_S)
4725 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004726 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004727}
4728
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004729static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004731 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004732 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004733
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004734 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735}
4736
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004737static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004738{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004739 /* Make sure we've acked all the TCP data associated
4740 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004741 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004743 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004744}
4745
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004746static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004747{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004748 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004749 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004750 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004751
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004752 peer_device = conn_peer_device(connection, pi->vnr);
4753 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004754 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004755 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004756
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004757 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004758 case C_WF_SYNC_UUID:
4759 case C_WF_BITMAP_T:
4760 case C_BEHIND:
4761 break;
4762 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004763 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004764 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004765 }
4766
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004767 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004768
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004769 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004770}
4771
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004772static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4773{
4774 struct drbd_peer_device *peer_device;
4775 struct p_block_desc *p = pi->data;
4776 struct drbd_device *device;
4777 sector_t sector;
4778 int size, err = 0;
4779
4780 peer_device = conn_peer_device(connection, pi->vnr);
4781 if (!peer_device)
4782 return -EIO;
4783 device = peer_device->device;
4784
4785 sector = be64_to_cpu(p->sector);
4786 size = be32_to_cpu(p->blksize);
4787
4788 dec_rs_pending(device);
4789
4790 if (get_ldev(device)) {
4791 struct drbd_peer_request *peer_req;
Christoph Hellwig45c21792017-04-05 19:21:22 +02004792 const int op = REQ_OP_WRITE_ZEROES;
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004793
4794 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
Lars Ellenberg9104d312016-06-14 00:26:31 +02004795 size, 0, GFP_NOIO);
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004796 if (!peer_req) {
4797 put_ldev(device);
4798 return -ENOMEM;
4799 }
4800
4801 peer_req->w.cb = e_end_resync_block;
4802 peer_req->submit_jif = jiffies;
4803 peer_req->flags |= EE_IS_TRIM;
4804
4805 spin_lock_irq(&device->resource->req_lock);
4806 list_add_tail(&peer_req->w.list, &device->sync_ee);
4807 spin_unlock_irq(&device->resource->req_lock);
4808
4809 atomic_add(pi->size >> 9, &device->rs_sect_ev);
4810 err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
4811
4812 if (err) {
4813 spin_lock_irq(&device->resource->req_lock);
4814 list_del(&peer_req->w.list);
4815 spin_unlock_irq(&device->resource->req_lock);
4816
4817 drbd_free_peer_req(device, peer_req);
4818 put_ldev(device);
4819 err = 0;
4820 goto fail;
4821 }
4822
4823 inc_unacked(device);
4824
4825 /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
4826 as well as drbd_rs_complete_io() */
4827 } else {
4828 fail:
4829 drbd_rs_complete_io(device, sector);
4830 drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
4831 }
4832
4833 atomic_add(size >> 9, &device->rs_sect_in);
4834
4835 return err;
4836}
4837
Philipp Reisner02918be2010-08-20 14:35:10 +02004838struct data_cmd {
4839 int expect_payload;
Lars Ellenberg9104d312016-06-14 00:26:31 +02004840 unsigned int pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004841 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004842};
4843
Philipp Reisner02918be2010-08-20 14:35:10 +02004844static struct data_cmd drbd_cmd_handler[] = {
4845 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4846 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4847 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4848 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004849 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4850 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4851 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004852 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4853 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004854 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4855 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004856 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4857 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4858 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4859 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4860 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4861 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4862 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4863 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4864 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004865 [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner02918be2010-08-20 14:35:10 +02004866 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004867 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004868 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004869 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004870 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004871 [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
Lars Ellenberg9104d312016-06-14 00:26:31 +02004872 [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004873};
4874
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004875static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004876{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004877 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004878 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004879 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004880
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004881 while (get_t_state(&connection->receiver) == RUNNING) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02004882 struct data_cmd const *cmd;
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004883
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004884 drbd_thread_current_set_cpu(&connection->receiver);
Lars Ellenberg944410e2014-05-06 15:02:05 +02004885 update_receiver_timing_details(connection, drbd_recv_header);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004886 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004887 goto err_out;
4888
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004889 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004890 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004891 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004892 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004893 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004894 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004896 shs = cmd->pkt_size;
Lars Ellenberg9104d312016-06-14 00:26:31 +02004897 if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
4898 shs += sizeof(struct o_qlim);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004899 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004900 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004901 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004902 goto err_out;
4903 }
Lars Ellenberg9104d312016-06-14 00:26:31 +02004904 if (pi.size < shs) {
4905 drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
4906 cmdname(pi.cmd), (int)shs, pi.size);
4907 goto err_out;
4908 }
Philipp Reisner02918be2010-08-20 14:35:10 +02004909
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004910 if (shs) {
Lars Ellenberg944410e2014-05-06 15:02:05 +02004911 update_receiver_timing_details(connection, drbd_recv_all_warn);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004912 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004913 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004914 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004915 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004916 }
4917
Lars Ellenberg944410e2014-05-06 15:02:05 +02004918 update_receiver_timing_details(connection, cmd->fn);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004919 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004920 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004921 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004922 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004923 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004924 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004925 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004926 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004928 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004929 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004930}
4931
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004932static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004933{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004934 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004935 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004936 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004937
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004938 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004939 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004940
Lars Ellenberg545752d2011-12-05 14:39:25 +01004941 /* We are about to start the cleanup after connection loss.
4942 * Make sure drbd_make_request knows about that.
4943 * Usually we should be in some network failure state already,
4944 * but just in case we are not, we fix it up here.
4945 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004946 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004947
Philipp Reisner668700b2015-03-16 16:08:29 +01004948 /* ack_receiver does not clean up anything. it must not interfere, either */
Philipp Reisner1c03e522015-03-16 15:01:00 +01004949 drbd_thread_stop(&connection->ack_receiver);
Philipp Reisner668700b2015-03-16 16:08:29 +01004950 if (connection->ack_sender) {
4951 destroy_workqueue(connection->ack_sender);
4952 connection->ack_sender = NULL;
4953 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004954 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004955
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004956 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004957 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4958 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004959 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004960 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004961 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004962 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004963 rcu_read_lock();
4964 }
4965 rcu_read_unlock();
4966
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004967 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004968 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004969 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004970 atomic_set(&connection->current_epoch->epoch_size, 0);
4971 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004972
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004973 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004974
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004975 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4976 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004977
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004978 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004979 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004980 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004981 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004982
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004983 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004984
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004985 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004986 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004987}
4988
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004989static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004990{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004991 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004992 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004993
Philipp Reisner85719572010-07-21 10:20:17 +02004994 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004995 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004996 _drbd_wait_ee_list_empty(device, &device->active_ee);
4997 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4998 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004999 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005000
5001 /* We do not have data structures that would allow us to
5002 * get the rs_pending_cnt down to 0 again.
5003 * * On C_SYNC_TARGET we do not have any data structures describing
5004 * the pending RSDataRequest's we have sent.
5005 * * On C_SYNC_SOURCE there is no data structure that tracks
5006 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5007 * And no, it is not the sum of the reference counts in the
5008 * resync_LRU. The resync_LRU tracks the whole operation including
5009 * the disk-IO, while the rs_pending_cnt only tracks the blocks
5010 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005011 drbd_rs_cancel_all(device);
5012 device->rs_total = 0;
5013 device->rs_failed = 0;
5014 atomic_set(&device->rs_pending_cnt, 0);
5015 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005016
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005017 del_timer_sync(&device->resync_timer);
5018 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005019
Philipp Reisnerb411b362009-09-25 16:07:19 -07005020 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5021 * w_make_resync_request etc. which may still be on the worker queue
5022 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02005023 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005025 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005026
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01005027 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5028 might have issued a work again. The one before drbd_finish_peer_reqs() is
5029 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02005030 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01005031
Lars Ellenberg08332d72012-08-17 15:09:13 +02005032 /* need to do it again, drbd_finish_peer_reqs() may have populated it
5033 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005034 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005035
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005036 kfree(device->p_uuid);
5037 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005039 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005040 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005042 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005043
Lars Ellenbergbe115b62016-06-14 00:26:11 +02005044 if (get_ldev(device)) {
5045 drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5046 "write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5047 put_ldev(device);
5048 }
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01005049
Philipp Reisnerb411b362009-09-25 16:07:19 -07005050 /* tcp_close and release of sendpage pages can be deferred. I don't
5051 * want to use SO_LINGER, because apparently it can be deferred for
5052 * more than 20 seconds (longest time I checked).
5053 *
5054 * Actually we don't care for exactly when the network stack does its
5055 * put_page(), but release our reference on these pages right here.
5056 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005057 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005059 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005060 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02005061 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005062 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005063 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005064 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005065 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005066
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005067 D_ASSERT(device, list_empty(&device->read_ee));
5068 D_ASSERT(device, list_empty(&device->active_ee));
5069 D_ASSERT(device, list_empty(&device->sync_ee));
5070 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005071
Philipp Reisner360cc742011-02-08 14:29:53 +01005072 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005073}
5074
5075/*
5076 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5077 * we can agree on is stored in agreed_pro_version.
5078 *
5079 * feature flags and the reserved array should be enough room for future
5080 * enhancements of the handshake protocol, and possible plugins...
5081 *
5082 * for now, they are expected to be zero, but ignored.
5083 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005084static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005085{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005086 struct drbd_socket *sock;
5087 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005088
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005089 sock = &connection->data;
5090 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005091 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01005092 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005093 memset(p, 0, sizeof(*p));
5094 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5095 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02005096 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005097 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005098}
5099
5100/*
5101 * return values:
5102 * 1 yes, we have a valid connection
5103 * 0 oops, did not work out, please try again
5104 * -1 peer talks different language,
5105 * no point in trying again, please go standalone.
5106 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005107static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005108{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005109 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005110 struct p_connection_features *p;
5111 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01005112 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005113 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005114
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005115 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01005116 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005117 return 0;
5118
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005119 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005120 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005121 return 0;
5122
Andreas Gruenbacher60381782011-03-28 17:05:50 +02005123 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005124 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005125 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005126 return -1;
5127 }
5128
Philipp Reisner77351055b2011-02-07 17:24:26 +01005129 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005130 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005131 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005132 return -1;
5133 }
5134
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005135 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005136 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005137 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005138 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005139
Philipp Reisnerb411b362009-09-25 16:07:19 -07005140 p->protocol_min = be32_to_cpu(p->protocol_min);
5141 p->protocol_max = be32_to_cpu(p->protocol_max);
5142 if (p->protocol_max == 0)
5143 p->protocol_max = p->protocol_min;
5144
5145 if (PRO_VERSION_MAX < p->protocol_min ||
5146 PRO_VERSION_MIN > p->protocol_max)
5147 goto incompat;
5148
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005149 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02005150 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005151
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005152 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005153 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005154
Lars Ellenberg9104d312016-06-14 00:26:31 +02005155 drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n",
5156 connection->agreed_features,
5157 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
5158 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5159 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" :
5160 connection->agreed_features ? "" : " none");
Philipp Reisner92d94ae2016-06-14 00:26:15 +02005161
Philipp Reisnerb411b362009-09-25 16:07:19 -07005162 return 1;
5163
5164 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005165 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07005166 "I support %d-%d, peer supports %d-%d\n",
5167 PRO_VERSION_MIN, PRO_VERSION_MAX,
5168 p->protocol_min, p->protocol_max);
5169 return -1;
5170}
5171
5172#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005173static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005174{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005175 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
5176 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005177 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005178}
5179#else
5180#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01005181
5182/* Return value:
5183 1 - auth succeeded,
5184 0 - failed, try again (network error),
5185 -1 - auth failed, don't try again.
5186*/
5187
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005188static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005190 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005191 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07005192 char *response = NULL;
5193 char *right_response = NULL;
5194 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005195 unsigned int key_len;
5196 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07005197 unsigned int resp_size;
Herbert Xu9534d672016-01-24 21:19:21 +08005198 SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
Philipp Reisner77351055b2011-02-07 17:24:26 +01005199 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005200 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005201 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005202
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005203 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
5204
Philipp Reisner44ed1672011-04-19 17:10:19 +02005205 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005206 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005207 key_len = strlen(nc->shared_secret);
5208 memcpy(secret, nc->shared_secret, key_len);
5209 rcu_read_unlock();
5210
Herbert Xu9534d672016-01-24 21:19:21 +08005211 desc->tfm = connection->cram_hmac_tfm;
5212 desc->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005213
Herbert Xu9534d672016-01-24 21:19:21 +08005214 rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005215 if (rv) {
Herbert Xu9534d672016-01-24 21:19:21 +08005216 drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005217 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005218 goto fail;
5219 }
5220
5221 get_random_bytes(my_challenge, CHALLENGE_LEN);
5222
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005223 sock = &connection->data;
5224 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005225 rv = 0;
5226 goto fail;
5227 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005228 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005229 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005230 if (!rv)
5231 goto fail;
5232
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005233 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005234 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005235 rv = 0;
5236 goto fail;
5237 }
5238
Philipp Reisner77351055b2011-02-07 17:24:26 +01005239 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005240 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005241 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005242 rv = 0;
5243 goto fail;
5244 }
5245
Philipp Reisner77351055b2011-02-07 17:24:26 +01005246 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005247 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005248 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005249 goto fail;
5250 }
5251
Philipp Reisner67cca282014-04-28 18:43:30 +02005252 if (pi.size < CHALLENGE_LEN) {
5253 drbd_err(connection, "AuthChallenge payload too small.\n");
5254 rv = -1;
5255 goto fail;
5256 }
5257
Philipp Reisner77351055b2011-02-07 17:24:26 +01005258 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005259 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005260 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005261 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005262 goto fail;
5263 }
5264
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005265 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005266 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005267 rv = 0;
5268 goto fail;
5269 }
5270
Philipp Reisner67cca282014-04-28 18:43:30 +02005271 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
5272 drbd_err(connection, "Peer presented the same challenge!\n");
5273 rv = -1;
5274 goto fail;
5275 }
5276
Herbert Xu9534d672016-01-24 21:19:21 +08005277 resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005278 response = kmalloc(resp_size, GFP_NOIO);
5279 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005280 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005281 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005282 goto fail;
5283 }
5284
Herbert Xu9534d672016-01-24 21:19:21 +08005285 rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005286 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005287 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005288 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005289 goto fail;
5290 }
5291
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005292 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005293 rv = 0;
5294 goto fail;
5295 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005296 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005297 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005298 if (!rv)
5299 goto fail;
5300
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005301 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005302 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005303 rv = 0;
5304 goto fail;
5305 }
5306
Philipp Reisner77351055b2011-02-07 17:24:26 +01005307 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005308 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005309 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005310 rv = 0;
5311 goto fail;
5312 }
5313
Philipp Reisner77351055b2011-02-07 17:24:26 +01005314 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005315 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316 rv = 0;
5317 goto fail;
5318 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005319
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005320 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005321 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005322 rv = 0;
5323 goto fail;
5324 }
5325
5326 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01005327 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005328 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005329 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005330 goto fail;
5331 }
5332
Herbert Xu9534d672016-01-24 21:19:21 +08005333 rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
5334 right_response);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005335 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005336 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005337 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005338 goto fail;
5339 }
5340
5341 rv = !memcmp(response, right_response, resp_size);
5342
5343 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005344 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02005345 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005346 else
5347 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005348
5349 fail:
5350 kfree(peers_ch);
5351 kfree(response);
5352 kfree(right_response);
Herbert Xu9534d672016-01-24 21:19:21 +08005353 shash_desc_zero(desc);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005354
5355 return rv;
5356}
5357#endif
5358
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02005359int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005360{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005361 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005362 int h;
5363
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005364 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005365
5366 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005367 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005368 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005369 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005370 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005371 }
5372 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005373 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005374 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005375 }
5376 } while (h == 0);
5377
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005378 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005379 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005380
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005381 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005382
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005383 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005384 return 0;
5385}
5386
5387/* ********* acknowledge sender ******** */
5388
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005389static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005390{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005391 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005392 int retcode = be32_to_cpu(p->retcode);
5393
5394 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005395 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005396 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005397 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005398 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005399 drbd_set_st_err_str(retcode), retcode);
5400 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005401 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005402
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005403 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005404}
5405
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005406static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005407{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005408 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005409 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005410 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005411 int retcode = be32_to_cpu(p->retcode);
5412
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005413 peer_device = conn_peer_device(connection, pi->vnr);
5414 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005415 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005416 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005417
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005418 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005419 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005420 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005421 }
5422
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005423 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005424 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005425 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005426 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005427 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005428 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005429 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005430 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005431
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005432 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005433}
5434
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005435static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005436{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005437 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005438
5439}
5440
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005441static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005442{
5443 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005444 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5445 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5446 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005447
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005448 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005449}
5450
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005451static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005452{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005453 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005454 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005455 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005456 sector_t sector = be64_to_cpu(p->sector);
5457 int blksize = be32_to_cpu(p->blksize);
5458
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005459 peer_device = conn_peer_device(connection, pi->vnr);
5460 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005461 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005462 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005463
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005464 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005465
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005466 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005467
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005468 if (get_ldev(device)) {
5469 drbd_rs_complete_io(device, sector);
5470 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005471 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005472 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5473 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005474 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005475 dec_rs_pending(device);
5476 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005477
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005478 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005479}
5480
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005481static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005482validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005483 struct rb_root *root, const char *func,
5484 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005485{
5486 struct drbd_request *req;
5487 struct bio_and_error m;
5488
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005489 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005490 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005491 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005492 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005493 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005494 }
5495 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005496 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005497
5498 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005499 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005500 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005501}
5502
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005503static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005504{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005505 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005506 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005507 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005508 sector_t sector = be64_to_cpu(p->sector);
5509 int blksize = be32_to_cpu(p->blksize);
5510 enum drbd_req_event what;
5511
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005512 peer_device = conn_peer_device(connection, pi->vnr);
5513 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005514 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005515 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005516
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005517 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005518
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005519 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005520 drbd_set_in_sync(device, sector, blksize);
5521 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005522 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005523 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005524 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005525 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005526 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005527 break;
5528 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005529 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005530 break;
5531 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005532 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005533 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005534 case P_SUPERSEDED:
5535 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005536 break;
5537 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005538 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005539 break;
5540 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005541 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005542 }
5543
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005544 return validate_req_change_req_state(device, p->block_id, sector,
5545 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005546 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005547}
5548
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005549static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005550{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005551 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005552 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005553 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005554 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005555 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005556 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005557
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005558 peer_device = conn_peer_device(connection, pi->vnr);
5559 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005560 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005561 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005562
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005563 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005564
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005565 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005566 dec_rs_pending(device);
5567 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005568 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005569 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005570
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005571 err = validate_req_change_req_state(device, p->block_id, sector,
5572 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005573 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005574 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005575 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5576 The master bio might already be completed, therefore the
5577 request is no longer in the collision hash. */
5578 /* In Protocol B we might already have got a P_RECV_ACK
5579 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005580 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005581 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005582 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005583}
5584
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005585static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005586{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005587 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005588 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005589 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005590 sector_t sector = be64_to_cpu(p->sector);
5591
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005592 peer_device = conn_peer_device(connection, pi->vnr);
5593 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005594 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005595 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005596
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005597 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005598
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005599 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005600 (unsigned long long)sector, be32_to_cpu(p->blksize));
5601
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005602 return validate_req_change_req_state(device, p->block_id, sector,
5603 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005604 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005605}
5606
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005607static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005608{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005609 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005610 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005611 sector_t sector;
5612 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005613 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005614
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005615 peer_device = conn_peer_device(connection, pi->vnr);
5616 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005617 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005618 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005619
5620 sector = be64_to_cpu(p->sector);
5621 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005622
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005623 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005624
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005625 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005626
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005627 if (get_ldev_if_state(device, D_FAILED)) {
5628 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005629 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005630 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005631 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005632 case P_RS_CANCEL:
5633 break;
5634 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005635 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005636 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005637 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005638 }
5639
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005640 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005641}
5642
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005643static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005644{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005645 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005646 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005647 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005648
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005649 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005650
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005651 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005652 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5653 struct drbd_device *device = peer_device->device;
5654
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005655 if (device->state.conn == C_AHEAD &&
5656 atomic_read(&device->ap_in_flight) == 0 &&
5657 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5658 device->start_resync_timer.expires = jiffies + HZ;
5659 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005660 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005661 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005662 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005663
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005664 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005665}
5666
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005667static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005668{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005669 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005670 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005671 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005672 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005673 sector_t sector;
5674 int size;
5675
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005676 peer_device = conn_peer_device(connection, pi->vnr);
5677 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005678 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005679 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005680
Philipp Reisnerb411b362009-09-25 16:07:19 -07005681 sector = be64_to_cpu(p->sector);
5682 size = be32_to_cpu(p->blksize);
5683
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005684 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005685
5686 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005687 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005688 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005689 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005690
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005691 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005692 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005693
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005694 drbd_rs_complete_io(device, sector);
5695 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005696
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005697 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005698
5699 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005700 if ((device->ov_left & 0x200) == 0x200)
5701 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005702
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005703 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005704 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5705 if (dw) {
5706 dw->w.cb = w_ov_finished;
5707 dw->device = device;
5708 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005709 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005710 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005711 ov_out_of_sync_print(device);
5712 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005713 }
5714 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005715 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005716 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005717}
5718
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005719static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005720{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005721 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005722}
5723
Philipp Reisner668700b2015-03-16 16:08:29 +01005724struct meta_sock_cmd {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005725 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005726 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005727};
5728
Philipp Reisner668700b2015-03-16 16:08:29 +01005729static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5730{
5731 long t;
5732 struct net_conf *nc;
5733
5734 rcu_read_lock();
5735 nc = rcu_dereference(connection->net_conf);
5736 t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5737 rcu_read_unlock();
5738
5739 t *= HZ;
5740 if (ping_timeout)
5741 t /= 10;
5742
5743 connection->meta.socket->sk->sk_rcvtimeo = t;
5744}
5745
5746static void set_ping_timeout(struct drbd_connection *connection)
5747{
5748 set_rcvtimeo(connection, 1);
5749}
5750
5751static void set_idle_timeout(struct drbd_connection *connection)
5752{
5753 set_rcvtimeo(connection, 0);
5754}
5755
5756static struct meta_sock_cmd ack_receiver_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005757 [P_PING] = { 0, got_Ping },
5758 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005759 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5760 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5761 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005762 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005763 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5764 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005765 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005766 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5767 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5768 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5769 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005770 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005771 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5772 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5773 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005774};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005775
Philipp Reisner1c03e522015-03-16 15:01:00 +01005776int drbd_ack_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005777{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005778 struct drbd_connection *connection = thi->connection;
Philipp Reisner668700b2015-03-16 16:08:29 +01005779 struct meta_sock_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005780 struct packet_info pi;
Philipp Reisner668700b2015-03-16 16:08:29 +01005781 unsigned long pre_recv_jif;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005782 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005783 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005784 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005785 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005786 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005787 bool ping_timeout_active = false;
Philipp Reisner3990e042013-03-27 14:08:48 +01005788 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005789
Philipp Reisner3990e042013-03-27 14:08:48 +01005790 rv = sched_setscheduler(current, SCHED_RR, &param);
5791 if (rv < 0)
Philipp Reisner668700b2015-03-16 16:08:29 +01005792 drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005793
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005794 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005795 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005796
Philipp Reisner668700b2015-03-16 16:08:29 +01005797 conn_reclaim_net_peer_reqs(connection);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005798
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005799 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5800 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005801 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005802 goto reconnect;
5803 }
Philipp Reisner668700b2015-03-16 16:08:29 +01005804 set_ping_timeout(connection);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005805 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005806 }
5807
Philipp Reisner668700b2015-03-16 16:08:29 +01005808 pre_recv_jif = jiffies;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005809 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005810
5811 /* Note:
5812 * -EINTR (on meta) we got a signal
5813 * -EAGAIN (on meta) rcvtimeo expired
5814 * -ECONNRESET other side closed the connection
5815 * -ERESTARTSYS (on data) we got a signal
5816 * rv < 0 other than above: unexpected error!
5817 * rv == expected: full header or command
5818 * rv < expected: "woken" by signal during receive
5819 * rv == 0 : "connection shut down by peer"
5820 */
5821 if (likely(rv > 0)) {
5822 received += rv;
5823 buf += rv;
5824 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005825 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005826 long t;
5827 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005828 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005829 rcu_read_unlock();
5830
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005831 t = wait_event_timeout(connection->ping_wait,
5832 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005833 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005834 if (t)
5835 break;
5836 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005837 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005838 goto reconnect;
5839 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005840 /* If the data socket received something meanwhile,
5841 * that is good enough: peer is still alive. */
Philipp Reisner668700b2015-03-16 16:08:29 +01005842 if (time_after(connection->last_received, pre_recv_jif))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005843 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005844 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005845 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005846 goto reconnect;
5847 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005848 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005849 continue;
5850 } else if (rv == -EINTR) {
Philipp Reisner668700b2015-03-16 16:08:29 +01005851 /* maybe drbd_thread_stop(): the while condition will notice.
5852 * maybe woken for send_ping: we'll send a ping above,
5853 * and change the rcvtimeo */
5854 flush_signals(current);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005855 continue;
5856 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005857 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005858 goto reconnect;
5859 }
5860
5861 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005862 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005863 goto reconnect;
Philipp Reisner668700b2015-03-16 16:08:29 +01005864 cmd = &ack_receiver_tbl[pi.cmd];
5865 if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005866 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005867 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005868 goto disconnect;
5869 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005870 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005871 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005872 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005873 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005874 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005875 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005876 }
5877 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005878 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005879
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005880 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005881 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005882 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005883 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005884 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005885
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005886 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005887
Philipp Reisner668700b2015-03-16 16:08:29 +01005888 if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
5889 set_idle_timeout(connection);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005890 ping_timeout_active = false;
5891 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005892
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005893 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005894 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005895 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005896 cmd = NULL;
5897 }
5898 }
5899
5900 if (0) {
5901reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005902 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5903 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005904 }
5905 if (0) {
5906disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005907 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005908 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005909
Philipp Reisner668700b2015-03-16 16:08:29 +01005910 drbd_info(connection, "ack_receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005911
5912 return 0;
5913}
Philipp Reisner668700b2015-03-16 16:08:29 +01005914
5915void drbd_send_acks_wf(struct work_struct *ws)
5916{
5917 struct drbd_peer_device *peer_device =
5918 container_of(ws, struct drbd_peer_device, send_acks_work);
5919 struct drbd_connection *connection = peer_device->connection;
5920 struct drbd_device *device = peer_device->device;
5921 struct net_conf *nc;
5922 int tcp_cork, err;
5923
5924 rcu_read_lock();
5925 nc = rcu_dereference(connection->net_conf);
5926 tcp_cork = nc->tcp_cork;
5927 rcu_read_unlock();
5928
5929 if (tcp_cork)
5930 drbd_tcp_cork(connection->meta.socket);
5931
5932 err = drbd_finish_peer_reqs(device);
5933 kref_put(&device->kref, drbd_destroy_device);
5934 /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
5935 struct work_struct send_acks_work alive, which is in the peer_device object */
5936
5937 if (err) {
5938 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5939 return;
5940 }
5941
5942 if (tcp_cork)
5943 drbd_tcp_uncork(connection->meta.socket);
5944
5945 return;
5946}