blob: 5fd4eaee49bcace7a11653c3d467f95db43a8861 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
46 * drbd_md_io_complete (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010049 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +020061 to evaluate the resync after dependencies, we grab a write lock, because
Philipp Reisnerb411b362009-09-25 16:07:19 -070062 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020071 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73 md_io = (struct drbd_md_io *)bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020074 device = container_of(md_io, struct drbd_device, md_io);
Philipp Reisnercdfda632011-07-05 15:38:59 +020075
Philipp Reisnerb411b362009-09-25 16:07:19 -070076 md_io->error = error;
77
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020087 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010088 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020089 drbd_md_put_buffer(device);
Philipp Reisnercdfda632011-07-05 15:38:59 +020090 md_io->done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020091 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020092 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020093 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070095}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
Rashika Kheriaa186e472013-12-19 15:06:10 +0530100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700101{
102 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200103 struct drbd_peer_device *peer_device = peer_req->peer_device;
104 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700105
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200106 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200107 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200108 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200109 if (list_empty(&device->read_ee))
110 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100111 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200112 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200113 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700114
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200115 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200116 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700117}
118
119/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200120 * "submitted" by the receiver, final stage. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200121void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122{
123 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200124 struct drbd_peer_device *peer_device = peer_req->peer_device;
125 struct drbd_device *device = peer_device->device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200126 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100128 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700130
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100131 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700132 * we may no longer access it,
133 * it may be freed/reused already!
134 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200135 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100136 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
137 block_id = peer_req->block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700138
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200139 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200140 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200141 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700142
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100143 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100144 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100145 * Ack yet and did not wake possibly waiting conflicting requests.
146 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200147 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100148 * _drbd_clear_done_ee.
149 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200151 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200153 /* FIXME do we want to detach for failed REQ_DISCARD?
154 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
155 if (peer_req->flags & EE_WAS_ERROR)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200156 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200157 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100159 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200160 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200163 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164
165 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200166 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200168 wake_asender(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200169 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172/* writes on behalf of the partner, or resync writes,
173 * "submitted" by the receiver.
174 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100175void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100177 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200178 struct drbd_device *device = peer_req->peer_device->device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200179 int uptodate = bio_flagged(bio, BIO_UPTODATE);
180 int is_write = bio_data_dir(bio) == WRITE;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200181 int is_discard = !!(bio->bi_rw & REQ_DISCARD);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200182
Lars Ellenberg07194272010-12-20 15:38:07 +0100183 if (error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200184 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200185 is_write ? (is_discard ? "discard" : "write")
186 : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100187 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200188 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100189 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200190 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
Lars Ellenberg07194272010-12-20 15:38:07 +0100191 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100192 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200193 /* strange behavior of some lower level drivers...
194 * fail the request by clearing the uptodate flag,
195 * but do not return any error?! */
196 error = -EIO;
197 }
198
199 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100200 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200201
202 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200204 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100205 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200206 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100207 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200208 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700209}
210
211/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
212 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100213void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214{
Lars Ellenberga1154132010-11-13 20:42:29 +0100215 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200217 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100218 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219 enum drbd_req_event what;
220 int uptodate = bio_flagged(bio, BIO_UPTODATE);
221
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222 if (!error && !uptodate) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200223 drbd_warn(device, "p %s: setting error to -EIO\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224 bio_data_dir(bio) == WRITE ? "write" : "read");
225 /* strange behavior of some lower level drivers...
226 * fail the request by clearing the uptodate flag,
227 * but do not return any error?! */
228 error = -EIO;
229 }
230
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200231
232 /* If this request was aborted locally before,
233 * but now was completed "successfully",
234 * chances are that this caused arbitrary data corruption.
235 *
236 * "aborting" requests, or force-detaching the disk, is intended for
237 * completely blocked/hung local backing devices which do no longer
238 * complete requests at all, not even do error completions. In this
239 * situation, usually a hard-reset and failover is the only way out.
240 *
241 * By "aborting", basically faking a local error-completion,
242 * we allow for a more graceful swichover by cleanly migrating services.
243 * Still the affected node has to be rebooted "soon".
244 *
245 * By completing these requests, we allow the upper layers to re-use
246 * the associated data pages.
247 *
248 * If later the local backing device "recovers", and now DMAs some data
249 * from disk into the original request pages, in the best case it will
250 * just put random data into unused pages; but typically it will corrupt
251 * meanwhile completely unrelated data, causing all sorts of damage.
252 *
253 * Which means delayed successful completion,
254 * especially for READ requests,
255 * is a reason to panic().
256 *
257 * We assume that a delayed *error* completion is OK,
258 * though we still will complain noisily about it.
259 */
260 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
261 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200262 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200263
264 if (!error)
265 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
266 }
267
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 /* to avoid recursion in __req_mod */
269 if (unlikely(error)) {
270 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100271 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200272 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100273 ? READ_COMPLETED_WITH_ERROR
274 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100276 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277
278 bio_put(req->private_bio);
279 req->private_bio = ERR_PTR(error);
280
Lars Ellenberga1154132010-11-13 20:42:29 +0100281 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200282 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100283 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200284 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200285 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100286
287 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200288 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289}
290
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200291void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292{
293 struct hash_desc desc;
294 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100295 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200296 struct page *tmp;
297 unsigned len;
298
299 desc.tfm = tfm;
300 desc.flags = 0;
301
302 sg_init_table(&sg, 1);
303 crypto_hash_init(&desc);
304
305 while ((tmp = page_chain_next(page))) {
306 /* all but the last page will be fully used */
307 sg_set_page(&sg, page, PAGE_SIZE, 0);
308 crypto_hash_update(&desc, &sg, sg.length);
309 page = tmp;
310 }
311 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100312 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200313 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
314 crypto_hash_update(&desc, &sg, sg.length);
315 crypto_hash_final(&desc, digest);
316}
317
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200318void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700319{
320 struct hash_desc desc;
321 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800322 struct bio_vec bvec;
323 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324
325 desc.tfm = tfm;
326 desc.flags = 0;
327
328 sg_init_table(&sg, 1);
329 crypto_hash_init(&desc);
330
Kent Overstreet79886132013-11-23 17:19:00 -0800331 bio_for_each_segment(bvec, bio, iter) {
332 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 crypto_hash_update(&desc, &sg, sg.length);
334 }
335 crypto_hash_final(&desc, digest);
336}
337
Lars Ellenberg9676c762011-02-22 14:02:31 +0100338/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100339static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200341 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200342 struct drbd_peer_device *peer_device = peer_req->peer_device;
343 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700344 int digest_size;
345 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100346 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100348 if (unlikely(cancel))
349 goto out;
350
Lars Ellenberg9676c762011-02-22 14:02:31 +0100351 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100352 goto out;
353
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200354 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100355 digest = kmalloc(digest_size, GFP_NOIO);
356 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 sector_t sector = peer_req->i.sector;
358 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200359 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100360 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100361 * In case we block on congestion, we could otherwise run into
362 * some distributed deadlock, if the other side blocks on
363 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200364 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200365 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200367 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200368 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100369 digest, digest_size,
370 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100371 kfree(digest);
372 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200373 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100374 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375 }
376
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100377out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100378 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200379 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100381 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200382 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100383 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384}
385
386#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
387
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200388static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200390 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100391 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200393 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200394 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200396 if (drbd_rs_should_slow_down(device, sector))
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200397 goto defer;
398
Philipp Reisnerb411b362009-09-25 16:07:19 -0700399 /* GFP_TRY, because if there is no memory available right now, this may
400 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200401 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200402 size, true /* has real payload */, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200404 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200406 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200408 list_add(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 atomic_add(size >> 9, &device->rs_sect_ev);
412 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200413 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100415 /* If it failed because of ENOMEM, retry should help. If it failed
416 * because bio_add_page failed (probably broken lower level driver),
417 * retry may or may not help.
418 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200419 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200420 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200421 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200422
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200423 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200424defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200425 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200426 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427}
428
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100429int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100430{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200431 struct drbd_device *device =
432 container_of(w, struct drbd_device, resync_work);
433
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100435 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200436 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100437 break;
438 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200439 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100440 break;
441 }
442
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100443 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100444}
445
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446void resync_timer_fn(unsigned long data)
447{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200448 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200450 if (list_empty(&device->resync_work.list))
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200451 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
452 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453}
454
Philipp Reisner778f2712010-07-06 11:14:00 +0200455static void fifo_set(struct fifo_buffer *fb, int value)
456{
457 int i;
458
459 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200460 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200461}
462
463static int fifo_push(struct fifo_buffer *fb, int value)
464{
465 int ov;
466
467 ov = fb->values[fb->head_index];
468 fb->values[fb->head_index++] = value;
469
470 if (fb->head_index >= fb->size)
471 fb->head_index = 0;
472
473 return ov;
474}
475
476static void fifo_add_val(struct fifo_buffer *fb, int value)
477{
478 int i;
479
480 for (i = 0; i < fb->size; i++)
481 fb->values[i] += value;
482}
483
Philipp Reisner9958c852011-05-03 16:19:31 +0200484struct fifo_buffer *fifo_alloc(int fifo_size)
485{
486 struct fifo_buffer *fb;
487
Lars Ellenberg8747d302012-09-26 14:22:40 +0200488 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200489 if (!fb)
490 return NULL;
491
492 fb->head_index = 0;
493 fb->size = fifo_size;
494 fb->total = 0;
495
496 return fb;
497}
498
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200499static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
Philipp Reisner778f2712010-07-06 11:14:00 +0200500{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200501 struct disk_conf *dc;
Philipp Reisner778f2712010-07-06 11:14:00 +0200502 unsigned int want; /* The number of sectors we want in the proxy */
503 int req_sect; /* Number of sectors to request in this turn */
504 int correction; /* Number of sectors more we need in the proxy*/
505 int cps; /* correction per invocation of drbd_rs_controller() */
506 int steps; /* Number of time steps to plan ahead */
507 int curr_corr;
508 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200509 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200510
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200511 dc = rcu_dereference(device->ldev->disk_conf);
512 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200513
Philipp Reisner813472c2011-05-03 16:47:02 +0200514 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200515
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200516 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200517 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200518 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200519 want = dc->c_fill_target ? dc->c_fill_target :
520 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200521 }
522
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200523 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200524
525 /* Plan ahead */
526 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200527 fifo_add_val(plan, cps);
528 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200529
530 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200531 curr_corr = fifo_push(plan, 0);
532 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200533
534 req_sect = sect_in + curr_corr;
535 if (req_sect < 0)
536 req_sect = 0;
537
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200538 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200539 if (req_sect > max_sect)
540 req_sect = max_sect;
541
542 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200543 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200544 sect_in, device->rs_in_flight, want, correction,
545 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200546 */
547
548 return req_sect;
549}
550
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200551static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100552{
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200553 unsigned int sect_in; /* Number of sectors that came in since the last turn */
554 int number, mxb;
555
556 sect_in = atomic_xchg(&device->rs_sect_in, 0);
557 device->rs_in_flight -= sect_in;
Philipp Reisner813472c2011-05-03 16:47:02 +0200558
559 rcu_read_lock();
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200560 mxb = drbd_get_max_buffers(device) / 2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200561 if (rcu_dereference(device->rs_plan_s)->size) {
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200562 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200563 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100564 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200565 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
566 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100567 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200568 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100569
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200570 /* Don't have more than "max-buffers"/2 in-flight.
571 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
572 * potentially causing a distributed deadlock on congestion during
573 * online-verify or (checksum-based) resync, if max-buffers,
574 * socket buffer sizes and resync rate settings are mis-configured. */
575 if (mxb - device->rs_in_flight < number)
576 number = mxb - device->rs_in_flight;
577
Lars Ellenberge65f4402010-11-05 10:04:07 +0100578 return number;
579}
580
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200581static int make_resync_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582{
583 unsigned long bit;
584 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200585 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100586 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100587 int number, rollback_i, size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588 int align, queued, sndbuf;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200589 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700590
591 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100592 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700593
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200594 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200595 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200596 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100597 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200598 }
599
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200600 if (!get_ldev(device)) {
601 /* Since we only need to access device->rsync a
602 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 to continue resync with a broken disk makes no sense at
604 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200605 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100606 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700607 }
608
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200609 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
610 number = drbd_rs_number_requests(device);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200611 if (number <= 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200612 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700613
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614 for (i = 0; i < number; i++) {
615 /* Stop generating RS requests, when half of the send buffer is filled */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200616 mutex_lock(&first_peer_device(device)->connection->data.mutex);
617 if (first_peer_device(device)->connection->data.socket) {
618 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
619 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 } else {
621 queued = 1;
622 sndbuf = 0;
623 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200624 mutex_unlock(&first_peer_device(device)->connection->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625 if (queued > sndbuf / 2)
626 goto requeue;
627
628next_sector:
629 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200630 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100632 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200633 device->bm_resync_fo = drbd_bm_bits(device);
634 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100635 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700636 }
637
638 sector = BM_BIT_TO_SECT(bit);
639
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200640 if (drbd_rs_should_slow_down(device, sector) ||
641 drbd_try_rs_begin_io(device, sector)) {
642 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700643 goto requeue;
644 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200645 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700646
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200647 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
648 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 goto next_sector;
650 }
651
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100652#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 /* try to find some adjacent bits.
654 * we stop if we have already the maximum req size.
655 *
656 * Additionally always align bigger requests, in order to
657 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658 */
659 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200660 rollback_i = i;
Lars Ellenberg6377b922014-04-28 18:43:17 +0200661 while (i < number) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100662 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 break;
664
665 /* Be always aligned */
666 if (sector & ((1<<(align+3))-1))
667 break;
668
669 /* do not cross extent boundaries */
670 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
671 break;
672 /* now, is it actually dirty, after all?
673 * caution, drbd_bm_test_bit is tri-state for some
674 * obscure reason; ( b == 0 ) would get the out-of-band
675 * only accidentally right because of the "oddly sized"
676 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200677 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 break;
679 bit++;
680 size += BM_BLOCK_SIZE;
681 if ((BM_BLOCK_SIZE << align) <= size)
682 align++;
683 i++;
684 }
685 /* if we merged some,
686 * reset the offset to start the next drbd_bm_find_next from */
687 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200688 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689#endif
690
691 /* adjust very last sectors, in case we are oddly sized */
692 if (sector + (size>>9) > capacity)
693 size = (capacity-sector)<<9;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200694 if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
695 first_peer_device(device)->connection->csums_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200696 switch (read_for_csum(first_peer_device(device), sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200697 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200698 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100699 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200700 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200701 drbd_rs_complete_io(device, sector);
702 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200703 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700704 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200705 case 0:
706 /* everything ok */
707 break;
708 default:
709 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 }
711 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100712 int err;
713
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200714 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200715 err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100716 sector, size, ID_SYNCER);
717 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200718 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200719 dec_rs_pending(device);
720 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100721 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722 }
723 }
724 }
725
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200726 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 /* last syncer _request_ was sent,
728 * but the P_RS_DATA_REPLY not yet received. sync will end (and
729 * next sync group will resume), as soon as we receive the last
730 * resync data block, and the last bit is cleared.
731 * until then resync "work" is "inactive" ...
732 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200733 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100734 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735 }
736
737 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200738 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
739 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
740 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100741 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742}
743
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200744static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745{
746 int number, i, size;
747 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200748 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200749 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750
751 if (unlikely(cancel))
752 return 1;
753
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200754 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700755
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200756 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200758 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200760
761 /* We check for "finished" only in the reply path:
762 * w_e_end_ov_reply().
763 * We need to send at least one request out. */
764 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200765 && verify_can_do_stop_sector(device)
766 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200767 if (stop_sector_reached)
768 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700769
770 size = BM_BLOCK_SIZE;
771
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200772 if (drbd_rs_should_slow_down(device, sector) ||
773 drbd_try_rs_begin_io(device, sector)) {
774 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775 goto requeue;
776 }
777
778 if (sector + (size>>9) > capacity)
779 size = (capacity-sector)<<9;
780
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200781 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200782 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200783 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700784 return 0;
785 }
786 sector += BM_SECT_PER_BIT;
787 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200788 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700789
790 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200791 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200792 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200793 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 return 1;
795}
796
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100797int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200799 struct drbd_device_work *dw =
800 container_of(w, struct drbd_device_work, w);
801 struct drbd_device *device = dw->device;
802 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200803 ov_out_of_sync_print(device);
804 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100806 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700807}
808
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100809static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200811 struct drbd_device_work *dw =
812 container_of(w, struct drbd_device_work, w);
813 struct drbd_device *device = dw->device;
814 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200816 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700817
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100818 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819}
820
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200821static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200822{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200823 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100824
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200825 clear_bit(GOT_PING_ACK, &connection->flags);
826 request_ping(connection);
827 wait_event(connection->ping_wait,
828 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200829}
830
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200831int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700832{
833 unsigned long db, dt, dbdt;
834 unsigned long n_oos;
835 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200836 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700837 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100838 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839
840 /* Remove all elements from the resync LRU. Since future actions
841 * might set bits in the (main) bitmap, then the entries in the
842 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200843 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 /* In case this is not possible now, most probably because
845 * there are P_RS_DATA_REPLY Packets lingering on the worker's
846 * queue (or even the read operations for those packets
847 * is not finished by now). Retry in 100ms. */
848
Philipp Reisner20ee6392011-01-18 15:28:59 +0100849 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200850 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
851 if (dw) {
852 dw->w.cb = w_resync_finished;
853 dw->device = device;
854 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
855 &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856 return 1;
857 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200858 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 }
860
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200861 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700862 if (dt <= 0)
863 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200864
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200865 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200866 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200867 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
868 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200869
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200871 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200873 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874 goto out;
875
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200876 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200877
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200878 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200879 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880
Lars Ellenberg26525612010-11-05 09:56:33 +0100881 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
882
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883 /* This protects us against multiple calls (that can happen in the presence
884 of application IO), and against connectivity loss just before we arrive here. */
885 if (os.conn <= C_CONNECTED)
886 goto out_unlock;
887
888 ns = os;
889 ns.conn = C_CONNECTED;
890
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200891 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200892 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200893 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200895 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896
897 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
898 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200899 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 n_oos, Bit2KB(1));
901 khelper_cmd = "out-of-sync";
902 }
903 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200904 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905
906 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
907 khelper_cmd = "after-resync-target";
908
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200909 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200910 const unsigned long s = device->rs_same_csum;
911 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 const int ratio =
913 (t == 0) ? 0 :
914 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200915 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 "transferred %luK total %luK\n",
917 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200918 Bit2KB(device->rs_same_csum),
919 Bit2KB(device->rs_total - device->rs_same_csum),
920 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921 }
922 }
923
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200924 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200925 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926
927 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
928 ns.disk = D_INCONSISTENT;
929 ns.pdsk = D_UP_TO_DATE;
930 } else {
931 ns.disk = D_UP_TO_DATE;
932 ns.pdsk = D_INCONSISTENT;
933 }
934 } else {
935 ns.disk = D_UP_TO_DATE;
936 ns.pdsk = D_UP_TO_DATE;
937
938 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200939 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 int i;
941 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200942 _drbd_uuid_set(device, i, device->p_uuid[i]);
943 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
944 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200946 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700947 }
948 }
949
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100950 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
951 /* for verify runs, we don't update uuids here,
952 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200953 drbd_uuid_set_bm(device, 0UL);
954 drbd_print_uuids(device, "updated UUIDs");
955 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100956 /* Now the two UUID sets are equal, update what we
957 * know of the peer. */
958 int i;
959 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200960 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100961 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700962 }
963 }
964
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200965 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200967 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200968 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200970 device->rs_total = 0;
971 device->rs_failed = 0;
972 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200973
974 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200975 if (verify_done && device->ov_left == 0)
976 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200978 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200979
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200981 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982
983 return 1;
984}
985
986/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200987static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200989 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100991 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200992 atomic_add(i, &device->pp_in_use_by_net);
993 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200994 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200995 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200996 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200997 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200999 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000}
1001
1002/**
1003 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001004 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005 * @w: work object.
1006 * @cancel: The connection will be closed anyways
1007 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001008int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001010 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001011 struct drbd_peer_device *peer_device = peer_req->peer_device;
1012 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001013 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
1015 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001016 drbd_free_peer_req(device, peer_req);
1017 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001018 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019 }
1020
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001021 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001022 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 } else {
1024 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001025 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001026 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001028 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 }
1030
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001031 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001033 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001035 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001036 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001037 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038}
1039
1040/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001041 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042 * @w: work object.
1043 * @cancel: The connection will be closed anyways
1044 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001045int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001046{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001047 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001048 struct drbd_peer_device *peer_device = peer_req->peer_device;
1049 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001050 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051
1052 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001053 drbd_free_peer_req(device, peer_req);
1054 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001055 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056 }
1057
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001058 if (get_ldev_if_state(device, D_FAILED)) {
1059 drbd_rs_complete_io(device, peer_req->i.sector);
1060 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 }
1062
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001063 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001064 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001065 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001066 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1067 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001068 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001069 } else {
1070 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001071 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001073 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001074 }
1075 } else {
1076 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001077 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001078 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001079
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001080 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081
1082 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001083 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084 }
1085
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001086 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001088 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001090 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001091 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001092 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093}
1094
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001095int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001097 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001098 struct drbd_peer_device *peer_device = peer_req->peer_device;
1099 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100 struct digest_info *di;
1101 int digest_size;
1102 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001103 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001104
1105 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001106 drbd_free_peer_req(device, peer_req);
1107 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001108 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001109 }
1110
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001111 if (get_ldev(device)) {
1112 drbd_rs_complete_io(device, peer_req->i.sector);
1113 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001114 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001115
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001116 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001117
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001118 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001119 /* quick hack to try to avoid a race against reconfiguration.
1120 * a real fix would be much more involved,
1121 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001122 if (peer_device->connection->csums_tfm) {
1123 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001124 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 digest = kmalloc(digest_size, GFP_NOIO);
1126 }
1127 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001128 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001129 eq = !memcmp(digest, di->digest, digest_size);
1130 kfree(digest);
1131 }
1132
1133 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001134 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001135 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001136 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001137 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001139 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001140 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1141 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001142 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001143 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144 }
1145 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001146 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001148 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149 }
1150
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001151 dec_unacked(device);
1152 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001154 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001155 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001156 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001157}
1158
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001159int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001160{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001161 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001162 struct drbd_peer_device *peer_device = peer_req->peer_device;
1163 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001164 sector_t sector = peer_req->i.sector;
1165 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001166 int digest_size;
1167 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001168 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001169
1170 if (unlikely(cancel))
1171 goto out;
1172
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001173 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001175 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001176 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001177 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001178 }
1179
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001180 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001181 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001182 else
1183 memset(digest, 0, digest_size);
1184
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001185 /* Free e and pages before send.
1186 * In case we block on congestion, we could otherwise run into
1187 * some distributed deadlock, if the other side blocks on
1188 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001189 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001190 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001191 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001192 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001193 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001194 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001195 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001196 kfree(digest);
1197
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001199 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001200 drbd_free_peer_req(device, peer_req);
1201 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001202 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203}
1204
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001205void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001207 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1208 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001210 device->ov_last_oos_start = sector;
1211 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001213 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214}
1215
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001216int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001218 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001219 struct drbd_peer_device *peer_device = peer_req->peer_device;
1220 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001223 sector_t sector = peer_req->i.sector;
1224 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001225 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001226 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001227 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228
1229 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001230 drbd_free_peer_req(device, peer_req);
1231 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001232 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 }
1234
1235 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1236 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001237 if (get_ldev(device)) {
1238 drbd_rs_complete_io(device, peer_req->i.sector);
1239 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001240 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001242 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001244 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001245 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 digest = kmalloc(digest_size, GFP_NOIO);
1247 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001248 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001249
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001250 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251 eq = !memcmp(digest, di->digest, digest_size);
1252 kfree(digest);
1253 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001254 }
1255
Lars Ellenberg9676c762011-02-22 14:02:31 +01001256 /* Free peer_req and pages before send.
1257 * In case we block on congestion, we could otherwise run into
1258 * some distributed deadlock, if the other side blocks on
1259 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001260 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001261 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001263 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001265 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001267 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001268 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001270 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001272 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001273
1274 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001275 if ((device->ov_left & 0x200) == 0x200)
1276 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001277
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001278 stop_sector_reached = verify_can_do_stop_sector(device) &&
1279 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001280
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001281 if (device->ov_left == 0 || stop_sector_reached) {
1282 ov_out_of_sync_print(device);
1283 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284 }
1285
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001286 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001287}
1288
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001289/* FIXME
1290 * We need to track the number of pending barrier acks,
1291 * and to be able to wait for them.
1292 * See also comment in drbd_adm_attach before drbd_suspend_io.
1293 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001294static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001296 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001297 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001299 sock = &connection->data;
1300 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001301 if (!p)
1302 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001303 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001304 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001305 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001306
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001307 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308}
1309
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001310int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001311{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001312 struct drbd_device *device =
1313 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001314 struct drbd_socket *sock;
1315
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001317 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001318 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001319 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001320 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001321 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001322}
1323
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001324static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001325{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001326 if (!connection->send.seen_any_write_yet) {
1327 connection->send.seen_any_write_yet = true;
1328 connection->send.current_epoch_nr = epoch;
1329 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001330 }
1331}
1332
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001333static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001334{
1335 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001336 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001337 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001338 if (connection->send.current_epoch_nr != epoch) {
1339 if (connection->send.current_epoch_writes)
1340 drbd_send_barrier(connection);
1341 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001342 }
1343}
1344
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001345int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001346{
1347 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001348 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001349 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001350 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001351
1352 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001353 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001354 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001355 }
1356
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001357 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001358 * If it was sent, it was the closing barrier for the last
1359 * replicated epoch, before we went into AHEAD mode.
1360 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001361 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001362
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001363 err = drbd_send_out_of_sync(first_peer_device(device), req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001364 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001365
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001366 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001367}
1368
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369/**
1370 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001371 * @w: work object.
1372 * @cancel: The connection will be closed anyways
1373 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001374int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375{
1376 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001377 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001378 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001379 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380
1381 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001382 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001383 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001384 }
1385
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001386 re_init_if_first_write(connection, req->epoch);
1387 maybe_send_barrier(connection, req->epoch);
1388 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001389
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001390 err = drbd_send_dblock(first_peer_device(device), req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001391 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001392
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001393 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001394}
1395
1396/**
1397 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001398 * @w: work object.
1399 * @cancel: The connection will be closed anyways
1400 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001401int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402{
1403 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001404 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001405 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001406 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407
1408 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001409 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001410 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001411 }
1412
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001413 /* Even read requests may close a write epoch,
1414 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001415 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001416
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001417 err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001418 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001419
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001420 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001422 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423}
1424
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001425int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001426{
1427 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001428 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001429
Philipp Reisner07782862010-08-31 12:00:50 +02001430 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001431 drbd_al_begin_io(device, &req->i, false);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001432
1433 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001434 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001435 generic_make_request(req->private_bio);
1436
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001437 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001438}
1439
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001440static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001441{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001442 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001443 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444
1445 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001446 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001447 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001448 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001449 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001450 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001451 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001453 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001454 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001455 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 if ((odev->state.conn >= C_SYNC_SOURCE &&
1457 odev->state.conn <= C_PAUSED_SYNC_T) ||
1458 odev->state.aftr_isp || odev->state.peer_isp ||
1459 odev->state.user_isp)
1460 return 0;
1461 }
1462}
1463
1464/**
1465 * _drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001466 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467 *
1468 * Called from process context only (admin command and after_state_ch).
1469 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001470static int _drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001471{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001472 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 int i, rv = 0;
1474
Philipp Reisner695d08f2011-04-11 22:53:32 -07001475 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001476 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1478 continue;
1479 if (!_drbd_may_sync_now(odev))
1480 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1481 != SS_NOTHING_TO_DO);
1482 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001483 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484
1485 return rv;
1486}
1487
1488/**
1489 * _drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001490 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491 *
1492 * Called from process context only (admin command and worker).
1493 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001494static int _drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001496 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497 int i, rv = 0;
1498
Philipp Reisner695d08f2011-04-11 22:53:32 -07001499 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001500 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1502 continue;
1503 if (odev->state.aftr_isp) {
1504 if (_drbd_may_sync_now(odev))
1505 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1506 CS_HARD, NULL)
1507 != SS_NOTHING_TO_DO) ;
1508 }
1509 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001510 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511 return rv;
1512}
1513
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001514void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515{
1516 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001517 _drbd_resume_next(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518 write_unlock_irq(&global_state_lock);
1519}
1520
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001521void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522{
1523 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001524 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525 write_unlock_irq(&global_state_lock);
1526}
1527
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001528/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001529enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001531 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001532 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533
1534 if (o_minor == -1)
1535 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001536 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001537 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538
1539 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001540 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001542 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001543 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001544
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001545 /* You are free to depend on diskless, non-existing,
1546 * or not yet/no longer existing minors.
1547 * We only reject dependency loops.
1548 * We cannot follow the dependency chain beyond a detached or
1549 * missing minor.
1550 */
1551 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1552 return NO_ERROR;
1553
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001554 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001555 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001556 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001558 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 return NO_ERROR;
1560
1561 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001562 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563 }
1564}
1565
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001566/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001567void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568{
1569 int changes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001571 do {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001572 changes = _drbd_pause_after(device);
1573 changes |= _drbd_resume_next(device);
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001574 } while (changes);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575}
1576
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001577void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001578{
Philipp Reisner813472c2011-05-03 16:47:02 +02001579 struct fifo_buffer *plan;
1580
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001581 atomic_set(&device->rs_sect_in, 0);
1582 atomic_set(&device->rs_sect_ev, 0);
1583 device->rs_in_flight = 0;
Philipp Reisner813472c2011-05-03 16:47:02 +02001584
1585 /* Updating the RCU protected object in place is necessary since
1586 this function gets called from atomic context.
1587 It is valid since all other updates also lead to an completely
1588 empty fifo */
1589 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001590 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001591 plan->total = 0;
1592 fifo_set(plan, 0);
1593 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001594}
1595
Philipp Reisner1f04af32011-02-07 11:33:59 +01001596void start_resync_timer_fn(unsigned long data)
1597{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001598 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001599
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001600 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
1601 &device->start_resync_work);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001602}
1603
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001604int w_start_resync(struct drbd_work *w, int cancel)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001605{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001606 struct drbd_device *device =
1607 container_of(w, struct drbd_device, start_resync_work);
Philipp Reisner00d56942011-02-09 18:09:48 +01001608
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001609 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001610 drbd_warn(device, "w_start_resync later...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001611 device->start_resync_timer.expires = jiffies + HZ/10;
1612 add_timer(&device->start_resync_timer);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001613 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001614 }
1615
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001616 drbd_start_resync(device, C_SYNC_SOURCE);
1617 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001618 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001619}
1620
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621/**
1622 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001623 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1625 *
1626 * This function might bring you directly into one of the
1627 * C_PAUSED_SYNC_* states.
1628 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001629void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630{
1631 union drbd_state ns;
1632 int r;
1633
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001634 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001635 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636 return;
1637 }
1638
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001639 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001640 if (side == C_SYNC_TARGET) {
1641 /* Since application IO was locked out during C_WF_BITMAP_T and
1642 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1643 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001644 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001645 r = (r >> 8) & 0xff;
1646 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001647 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001648 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001649 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001650 return;
1651 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001652 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001653 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001654 r = (r >> 8) & 0xff;
1655 if (r > 0) {
1656 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001657 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001658 "ignoring. Old userland tools?", r);
1659 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001660 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001661 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001662 conn_request_state(first_peer_device(device)->connection,
1663 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001664 return;
1665 }
1666 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001667 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668 }
1669
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001670 if (current == first_peer_device(device)->connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001671 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001672 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001673 if (!mutex_trylock(device->state_mutex)) {
1674 set_bit(B_RS_H_DONE, &device->flags);
1675 device->start_resync_timer.expires = jiffies + HZ/5;
1676 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001677 return;
1678 }
1679 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001680 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001681 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001682 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001684 write_lock_irq(&global_state_lock);
Philipp Reisnera7004712013-03-27 14:08:35 +01001685 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001686 if (device->state.conn < C_CONNECTED
1687 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001688 write_unlock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001689 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690 return;
1691 }
1692
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001693 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001694
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001695 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001696
1697 ns.conn = side;
1698
1699 if (side == C_SYNC_TARGET)
1700 ns.disk = D_INCONSISTENT;
1701 else /* side == C_SYNC_SOURCE */
1702 ns.pdsk = D_INCONSISTENT;
1703
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001704 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1705 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706
1707 if (ns.conn < C_CONNECTED)
1708 r = SS_UNKNOWN_ERROR;
1709
1710 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001711 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001712 unsigned long now = jiffies;
1713 int i;
1714
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001715 device->rs_failed = 0;
1716 device->rs_paused = 0;
1717 device->rs_same_csum = 0;
1718 device->rs_last_events = 0;
1719 device->rs_last_sect_ev = 0;
1720 device->rs_total = tw;
1721 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001722 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001723 device->rs_mark_left[i] = tw;
1724 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001725 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001726 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001727 }
1728 write_unlock_irq(&global_state_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001729
Philipp Reisnerb411b362009-09-25 16:07:19 -07001730 if (r == SS_SUCCESS) {
Philipp Reisner328e0f12012-10-19 14:37:47 +02001731 /* reset rs_last_bcast when a resync or verify is started,
1732 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001733 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001734
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001735 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001737 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1738 (unsigned long) device->rs_total);
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001739 if (side == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001740 device->bm_resync_fo = 0;
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001741
1742 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1743 * with w_send_oos, or the sync target will get confused as to
1744 * how much bits to resync. We cannot do that always, because for an
1745 * empty resync and protocol < 95, we need to do it here, as we call
1746 * drbd_resync_finished from here in that case.
1747 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1748 * and from after_state_ch otherwise. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001749 if (side == C_SYNC_SOURCE &&
1750 first_peer_device(device)->connection->agreed_pro_version < 96)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001751 drbd_gen_and_send_sync_uuid(first_peer_device(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001753 if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
1754 device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001755 /* This still has a race (about when exactly the peers
1756 * detect connection loss) that can lead to a full sync
1757 * on next handshake. In 8.3.9 we fixed this with explicit
1758 * resync-finished notifications, but the fix
1759 * introduces a protocol change. Sleeping for some
1760 * time longer than the ping interval + timeout on the
1761 * SyncSource, to give the SyncTarget the chance to
1762 * detect connection loss, then waiting for a ping
1763 * response (implicit in drbd_resync_finished) reduces
1764 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001765 if (side == C_SYNC_SOURCE) {
1766 struct net_conf *nc;
1767 int timeo;
1768
1769 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001770 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001771 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1772 rcu_read_unlock();
1773 schedule_timeout_interruptible(timeo);
1774 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001775 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 }
1777
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001778 drbd_rs_controller_reset(device);
1779 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780 * we may have been paused in between, or become paused until
1781 * the timer triggers.
1782 * No matter, that is handled in resync_timer_fn() */
1783 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001784 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001786 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001787 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001788 put_ldev(device);
1789 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790}
1791
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001792/* If the resource already closed the current epoch, but we did not
1793 * (because we have not yet seen new requests), we should send the
1794 * corresponding barrier now. Must be checked within the same spinlock
1795 * that is used to check for new requests. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001796static bool need_to_send_barrier(struct drbd_connection *connection)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001797{
1798 if (!connection->send.seen_any_write_yet)
1799 return false;
1800
1801 /* Skip barriers that do not contain any writes.
1802 * This may happen during AHEAD mode. */
1803 if (!connection->send.current_epoch_writes)
1804 return false;
1805
1806 /* ->req_lock is held when requests are queued on
1807 * connection->sender_work, and put into ->transfer_log.
1808 * It is also held when ->current_tle_nr is increased.
1809 * So either there are already new requests queued,
1810 * and corresponding barriers will be send there.
1811 * Or nothing new is queued yet, so the difference will be 1.
1812 */
1813 if (atomic_read(&connection->current_tle_nr) !=
1814 connection->send.current_epoch_nr + 1)
1815 return false;
1816
1817 return true;
1818}
1819
Rashika Kheriaa186e472013-12-19 15:06:10 +05301820static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001821{
1822 spin_lock_irq(&queue->q_lock);
1823 list_splice_init(&queue->q, work_list);
1824 spin_unlock_irq(&queue->q_lock);
1825 return !list_empty(work_list);
1826}
1827
Rashika Kheriaa186e472013-12-19 15:06:10 +05301828static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001829{
1830 spin_lock_irq(&queue->q_lock);
1831 if (!list_empty(&queue->q))
1832 list_move(queue->q.next, work_list);
1833 spin_unlock_irq(&queue->q_lock);
1834 return !list_empty(work_list);
1835}
1836
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001837static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001838{
1839 DEFINE_WAIT(wait);
1840 struct net_conf *nc;
1841 int uncork, cork;
1842
1843 dequeue_work_item(&connection->sender_work, work_list);
1844 if (!list_empty(work_list))
1845 return;
1846
1847 /* Still nothing to do?
1848 * Maybe we still need to close the current epoch,
1849 * even if no new requests are queued yet.
1850 *
1851 * Also, poke TCP, just in case.
1852 * Then wait for new work (or signal). */
1853 rcu_read_lock();
1854 nc = rcu_dereference(connection->net_conf);
1855 uncork = nc ? nc->tcp_cork : 0;
1856 rcu_read_unlock();
1857 if (uncork) {
1858 mutex_lock(&connection->data.mutex);
1859 if (connection->data.socket)
1860 drbd_tcp_uncork(connection->data.socket);
1861 mutex_unlock(&connection->data.mutex);
1862 }
1863
1864 for (;;) {
1865 int send_barrier;
1866 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001867 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001868 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02001869 /* dequeue single item only,
1870 * we still use drbd_queue_work_front() in some places */
1871 if (!list_empty(&connection->sender_work.q))
1872 list_move(connection->sender_work.q.next, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001873 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1874 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001875 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001876 break;
1877 }
1878 send_barrier = need_to_send_barrier(connection);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001879 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001880 if (send_barrier) {
1881 drbd_send_barrier(connection);
1882 connection->send.current_epoch_nr++;
1883 }
1884 schedule();
1885 /* may be woken up for other things but new work, too,
1886 * e.g. if the current epoch got closed.
1887 * In which case we send the barrier above. */
1888 }
1889 finish_wait(&connection->sender_work.q_wait, &wait);
1890
1891 /* someone may have changed the config while we have been waiting above. */
1892 rcu_read_lock();
1893 nc = rcu_dereference(connection->net_conf);
1894 cork = nc ? nc->tcp_cork : 0;
1895 rcu_read_unlock();
1896 mutex_lock(&connection->data.mutex);
1897 if (connection->data.socket) {
1898 if (cork)
1899 drbd_tcp_cork(connection->data.socket);
1900 else if (!uncork)
1901 drbd_tcp_uncork(connection->data.socket);
1902 }
1903 mutex_unlock(&connection->data.mutex);
1904}
1905
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906int drbd_worker(struct drbd_thread *thi)
1907{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001908 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001909 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001910 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001912 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001913
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001914 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01001915 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001916
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001917 /* as long as we use drbd_queue_work_front(),
1918 * we may only dequeue single work items here, not batches. */
1919 if (list_empty(&work_list))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001920 wait_for_work(connection, &work_list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001921
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001922 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001923 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01001924 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001925 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001926 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01001927 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001928 break;
1929 }
1930
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001931 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001934 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001935 w = list_first_entry(&work_list, struct drbd_work, list);
1936 list_del_init(&w->list);
1937 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001938 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001939 if (connection->cstate >= C_WF_REPORT_PARAMS)
1940 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941 }
1942 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001943
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001944 do {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001946 w = list_first_entry(&work_list, struct drbd_work, list);
1947 list_del_init(&w->list);
1948 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001949 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001950 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001951 } while (!list_empty(&work_list));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001952
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001953 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001954 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1955 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001956 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001957 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001958 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001959 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001960 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001961 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01001962 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001963 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001964
1965 return 0;
1966}