blob: 6e01e62c58a02e4440863f6b907407b5ff30e8f9 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
Andreas Gruenbachered15b792014-09-11 14:29:06 +020046 * drbd_md_endio (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbachered15b792014-09-11 14:29:06 +020049 * drbd_bm_endio (defined in drbd_bitmap.c)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010050 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +020061 to evaluate the resync after dependencies, we grab a write lock, because
Philipp Reisnerb411b362009-09-25 16:07:19 -070062 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
Andreas Gruenbachered15b792014-09-11 14:29:06 +020068void drbd_md_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -070069{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020070 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Lars Ellenberge37d2432014-04-01 23:53:30 +020072 device = bio->bi_private;
73 device->md_io.error = error;
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010075 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
76 * to timeout on the lower level device, and eventually detach from it.
77 * If this io completion runs after that timeout expired, this
78 * drbd_md_put_buffer() may allow us to finally try and re-attach.
79 * During normal operation, this only puts that extra reference
80 * down to 1 again.
81 * Make sure we first drop the reference, and only then signal
82 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
83 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020084 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010085 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020086 drbd_md_put_buffer(device);
Lars Ellenberge37d2432014-04-01 23:53:30 +020087 device->md_io.done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020088 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020089 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020090 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
91 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070092}
93
94/* reads on behalf of the partner,
95 * "submitted" by the receiver
96 */
Rashika Kheriaa186e472013-12-19 15:06:10 +053097static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -070098{
99 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200100 struct drbd_peer_device *peer_device = peer_req->peer_device;
101 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700102
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200103 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200104 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200105 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200106 if (list_empty(&device->read_ee))
107 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100108 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200109 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200110 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700111
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200112 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200113 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700114}
115
116/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200117 * "submitted" by the receiver, final stage. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200118void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700119{
120 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200121 struct drbd_peer_device *peer_device = peer_req->peer_device;
122 struct drbd_device *device = peer_device->device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200123 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700124 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100125 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700126 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100128 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129 * we may no longer access it,
130 * it may be freed/reused already!
131 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200132 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100133 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
134 block_id = peer_req->block_id;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200135 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700136
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200137 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200138 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200139 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700140
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100141 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100142 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100143 * Ack yet and did not wake possibly waiting conflicting requests.
144 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200145 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100146 * _drbd_clear_done_ee.
147 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700148
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200149 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200151 /* FIXME do we want to detach for failed REQ_DISCARD?
152 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
153 if (peer_req->flags & EE_WAS_ERROR)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200155 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100157 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200158 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200161 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200164 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200166 wake_asender(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200167 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170/* writes on behalf of the partner, or resync writes,
171 * "submitted" by the receiver.
172 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100173void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100175 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200176 struct drbd_device *device = peer_req->peer_device->device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 int uptodate = bio_flagged(bio, BIO_UPTODATE);
178 int is_write = bio_data_dir(bio) == WRITE;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200179 int is_discard = !!(bio->bi_rw & REQ_DISCARD);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200180
Lars Ellenberg07194272010-12-20 15:38:07 +0100181 if (error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200182 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200183 is_write ? (is_discard ? "discard" : "write")
184 : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100185 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200186 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100187 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200188 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
Lars Ellenberg07194272010-12-20 15:38:07 +0100189 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100190 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200191 /* strange behavior of some lower level drivers...
192 * fail the request by clearing the uptodate flag,
193 * but do not return any error?! */
194 error = -EIO;
195 }
196
197 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100198 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200199
200 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100201 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200202 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200204 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100205 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200206 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700207}
208
209/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
210 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100211void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212{
Lars Ellenberga1154132010-11-13 20:42:29 +0100213 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200215 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100216 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700217 enum drbd_req_event what;
218 int uptodate = bio_flagged(bio, BIO_UPTODATE);
219
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 if (!error && !uptodate) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200221 drbd_warn(device, "p %s: setting error to -EIO\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222 bio_data_dir(bio) == WRITE ? "write" : "read");
223 /* strange behavior of some lower level drivers...
224 * fail the request by clearing the uptodate flag,
225 * but do not return any error?! */
226 error = -EIO;
227 }
228
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200229
230 /* If this request was aborted locally before,
231 * but now was completed "successfully",
232 * chances are that this caused arbitrary data corruption.
233 *
234 * "aborting" requests, or force-detaching the disk, is intended for
235 * completely blocked/hung local backing devices which do no longer
236 * complete requests at all, not even do error completions. In this
237 * situation, usually a hard-reset and failover is the only way out.
238 *
239 * By "aborting", basically faking a local error-completion,
240 * we allow for a more graceful swichover by cleanly migrating services.
241 * Still the affected node has to be rebooted "soon".
242 *
243 * By completing these requests, we allow the upper layers to re-use
244 * the associated data pages.
245 *
246 * If later the local backing device "recovers", and now DMAs some data
247 * from disk into the original request pages, in the best case it will
248 * just put random data into unused pages; but typically it will corrupt
249 * meanwhile completely unrelated data, causing all sorts of damage.
250 *
251 * Which means delayed successful completion,
252 * especially for READ requests,
253 * is a reason to panic().
254 *
255 * We assume that a delayed *error* completion is OK,
256 * though we still will complain noisily about it.
257 */
258 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
259 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200260 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200261
262 if (!error)
263 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
264 }
265
Philipp Reisnerb411b362009-09-25 16:07:19 -0700266 /* to avoid recursion in __req_mod */
267 if (unlikely(error)) {
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200268 if (bio->bi_rw & REQ_DISCARD)
269 what = (error == -EOPNOTSUPP)
270 ? DISCARD_COMPLETED_NOTSUPP
271 : DISCARD_COMPLETED_WITH_ERROR;
272 else
273 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100274 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200275 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100276 ? READ_COMPLETED_WITH_ERROR
277 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100279 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280
281 bio_put(req->private_bio);
282 req->private_bio = ERR_PTR(error);
283
Lars Ellenberga1154132010-11-13 20:42:29 +0100284 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200285 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100286 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200287 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200288 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100289
290 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200291 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700292}
293
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200294void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200295{
296 struct hash_desc desc;
297 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100298 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 struct page *tmp;
300 unsigned len;
301
302 desc.tfm = tfm;
303 desc.flags = 0;
304
305 sg_init_table(&sg, 1);
306 crypto_hash_init(&desc);
307
308 while ((tmp = page_chain_next(page))) {
309 /* all but the last page will be fully used */
310 sg_set_page(&sg, page, PAGE_SIZE, 0);
311 crypto_hash_update(&desc, &sg, sg.length);
312 page = tmp;
313 }
314 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100315 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200316 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
317 crypto_hash_update(&desc, &sg, sg.length);
318 crypto_hash_final(&desc, digest);
319}
320
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200321void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
323 struct hash_desc desc;
324 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800325 struct bio_vec bvec;
326 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327
328 desc.tfm = tfm;
329 desc.flags = 0;
330
331 sg_init_table(&sg, 1);
332 crypto_hash_init(&desc);
333
Kent Overstreet79886132013-11-23 17:19:00 -0800334 bio_for_each_segment(bvec, bio, iter) {
335 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 crypto_hash_update(&desc, &sg, sg.length);
337 }
338 crypto_hash_final(&desc, digest);
339}
340
Lars Ellenberg9676c762011-02-22 14:02:31 +0100341/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100342static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200344 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200345 struct drbd_peer_device *peer_device = peer_req->peer_device;
346 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347 int digest_size;
348 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100349 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100351 if (unlikely(cancel))
352 goto out;
353
Lars Ellenberg9676c762011-02-22 14:02:31 +0100354 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100355 goto out;
356
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200357 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100358 digest = kmalloc(digest_size, GFP_NOIO);
359 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100360 sector_t sector = peer_req->i.sector;
361 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200362 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100363 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100364 * In case we block on congestion, we could otherwise run into
365 * some distributed deadlock, if the other side blocks on
366 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200367 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200368 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100369 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200370 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200371 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100372 digest, digest_size,
373 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100374 kfree(digest);
375 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200376 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100377 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378 }
379
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100380out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200382 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100384 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200385 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100386 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387}
388
389#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
390
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200391static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200393 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100394 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200396 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200397 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
399 /* GFP_TRY, because if there is no memory available right now, this may
400 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200401 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200402 size, true /* has real payload */, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200404 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200406 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +0200408 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 atomic_add(size >> 9, &device->rs_sect_ev);
412 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200413 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100415 /* If it failed because of ENOMEM, retry should help. If it failed
416 * because bio_add_page failed (probably broken lower level driver),
417 * retry may or may not help.
418 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200419 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200420 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200421 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200422
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200423 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200424defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200425 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200426 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427}
428
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100429int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100430{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200431 struct drbd_device *device =
432 container_of(w, struct drbd_device, resync_work);
433
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100435 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200436 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100437 break;
438 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200439 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100440 break;
441 }
442
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100443 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100444}
445
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446void resync_timer_fn(unsigned long data)
447{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200448 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449
Lars Ellenberg15e26f62014-04-28 11:43:21 +0200450 drbd_queue_work_if_unqueued(
451 &first_peer_device(device)->connection->sender_work,
452 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453}
454
Philipp Reisner778f2712010-07-06 11:14:00 +0200455static void fifo_set(struct fifo_buffer *fb, int value)
456{
457 int i;
458
459 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200460 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200461}
462
463static int fifo_push(struct fifo_buffer *fb, int value)
464{
465 int ov;
466
467 ov = fb->values[fb->head_index];
468 fb->values[fb->head_index++] = value;
469
470 if (fb->head_index >= fb->size)
471 fb->head_index = 0;
472
473 return ov;
474}
475
476static void fifo_add_val(struct fifo_buffer *fb, int value)
477{
478 int i;
479
480 for (i = 0; i < fb->size; i++)
481 fb->values[i] += value;
482}
483
Philipp Reisner9958c852011-05-03 16:19:31 +0200484struct fifo_buffer *fifo_alloc(int fifo_size)
485{
486 struct fifo_buffer *fb;
487
Lars Ellenberg8747d302012-09-26 14:22:40 +0200488 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200489 if (!fb)
490 return NULL;
491
492 fb->head_index = 0;
493 fb->size = fifo_size;
494 fb->total = 0;
495
496 return fb;
497}
498
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200499static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
Philipp Reisner778f2712010-07-06 11:14:00 +0200500{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200501 struct disk_conf *dc;
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200502 unsigned int want; /* The number of sectors we want in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200503 int req_sect; /* Number of sectors to request in this turn */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200504 int correction; /* Number of sectors more we need in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200505 int cps; /* correction per invocation of drbd_rs_controller() */
506 int steps; /* Number of time steps to plan ahead */
507 int curr_corr;
508 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200509 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200510
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200511 dc = rcu_dereference(device->ldev->disk_conf);
512 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200513
Philipp Reisner813472c2011-05-03 16:47:02 +0200514 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200515
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200516 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200517 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200518 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200519 want = dc->c_fill_target ? dc->c_fill_target :
520 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200521 }
522
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200523 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200524
525 /* Plan ahead */
526 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200527 fifo_add_val(plan, cps);
528 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200529
530 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200531 curr_corr = fifo_push(plan, 0);
532 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200533
534 req_sect = sect_in + curr_corr;
535 if (req_sect < 0)
536 req_sect = 0;
537
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200538 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200539 if (req_sect > max_sect)
540 req_sect = max_sect;
541
542 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200543 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200544 sect_in, device->rs_in_flight, want, correction,
545 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200546 */
547
548 return req_sect;
549}
550
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200551static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100552{
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200553 unsigned int sect_in; /* Number of sectors that came in since the last turn */
554 int number, mxb;
555
556 sect_in = atomic_xchg(&device->rs_sect_in, 0);
557 device->rs_in_flight -= sect_in;
Philipp Reisner813472c2011-05-03 16:47:02 +0200558
559 rcu_read_lock();
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200560 mxb = drbd_get_max_buffers(device) / 2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200561 if (rcu_dereference(device->rs_plan_s)->size) {
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200562 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200563 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100564 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200565 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
566 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100567 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200568 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100569
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200570 /* Don't have more than "max-buffers"/2 in-flight.
571 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
572 * potentially causing a distributed deadlock on congestion during
573 * online-verify or (checksum-based) resync, if max-buffers,
574 * socket buffer sizes and resync rate settings are mis-configured. */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200575
576 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
577 * mxb (as used here, and in drbd_alloc_pages on the peer) is
578 * "number of pages" (typically also 4k),
579 * but "rs_in_flight" is in "sectors" (512 Byte). */
580 if (mxb - device->rs_in_flight/8 < number)
581 number = mxb - device->rs_in_flight/8;
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200582
Lars Ellenberge65f4402010-11-05 10:04:07 +0100583 return number;
584}
585
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100586static int make_resync_request(struct drbd_device *const device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587{
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100588 struct drbd_peer_device *const peer_device = first_peer_device(device);
589 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700590 unsigned long bit;
591 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200592 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100593 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100594 int number, rollback_i, size;
Lars Ellenberg506afb62014-01-31 14:55:12 +0100595 int align, requeue = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200596 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100599 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200601 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200602 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200603 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100604 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200605 }
606
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200607 if (!get_ldev(device)) {
608 /* Since we only need to access device->rsync a
609 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700610 to continue resync with a broken disk makes no sense at
611 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200612 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100613 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614 }
615
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200616 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
617 number = drbd_rs_number_requests(device);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200618 if (number <= 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200619 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 for (i = 0; i < number; i++) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100622 /* Stop generating RS requests when half of the send buffer is filled,
623 * but notify TCP that we'd like to have more space. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100624 mutex_lock(&connection->data.mutex);
625 if (connection->data.socket) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100626 struct sock *sk = connection->data.socket->sk;
627 int queued = sk->sk_wmem_queued;
628 int sndbuf = sk->sk_sndbuf;
629 if (queued > sndbuf / 2) {
630 requeue = 1;
631 if (sk->sk_socket)
632 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
633 }
634 } else
635 requeue = 1;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100636 mutex_unlock(&connection->data.mutex);
Lars Ellenberg506afb62014-01-31 14:55:12 +0100637 if (requeue)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700638 goto requeue;
639
640next_sector:
641 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200642 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700643
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100644 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200645 device->bm_resync_fo = drbd_bm_bits(device);
646 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100647 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649
650 sector = BM_BIT_TO_SECT(bit);
651
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100652 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200653 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654 goto requeue;
655 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200656 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200658 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
659 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700660 goto next_sector;
661 }
662
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100663#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664 /* try to find some adjacent bits.
665 * we stop if we have already the maximum req size.
666 *
667 * Additionally always align bigger requests, in order to
668 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700669 */
670 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200671 rollback_i = i;
Lars Ellenberg6377b922014-04-28 18:43:17 +0200672 while (i < number) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100673 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 break;
675
676 /* Be always aligned */
677 if (sector & ((1<<(align+3))-1))
678 break;
679
680 /* do not cross extent boundaries */
681 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
682 break;
683 /* now, is it actually dirty, after all?
684 * caution, drbd_bm_test_bit is tri-state for some
685 * obscure reason; ( b == 0 ) would get the out-of-band
686 * only accidentally right because of the "oddly sized"
687 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200688 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689 break;
690 bit++;
691 size += BM_BLOCK_SIZE;
692 if ((BM_BLOCK_SIZE << align) <= size)
693 align++;
694 i++;
695 }
696 /* if we merged some,
697 * reset the offset to start the next drbd_bm_find_next from */
698 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200699 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700700#endif
701
702 /* adjust very last sectors, in case we are oddly sized */
703 if (sector + (size>>9) > capacity)
704 size = (capacity-sector)<<9;
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100705
706 if (device->use_csums) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100707 switch (read_for_csum(peer_device, sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200708 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200709 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100710 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200711 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200712 drbd_rs_complete_io(device, sector);
713 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200714 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200716 case 0:
717 /* everything ok */
718 break;
719 default:
720 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 }
722 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100723 int err;
724
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200725 inc_rs_pending(device);
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100726 err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100727 sector, size, ID_SYNCER);
728 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200729 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200730 dec_rs_pending(device);
731 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100732 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733 }
734 }
735 }
736
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200737 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738 /* last syncer _request_ was sent,
739 * but the P_RS_DATA_REPLY not yet received. sync will end (and
740 * next sync group will resume), as soon as we receive the last
741 * resync data block, and the last bit is cleared.
742 * until then resync "work" is "inactive" ...
743 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200744 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100745 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746 }
747
748 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200749 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
750 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
751 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100752 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700753}
754
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200755static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700756{
757 int number, i, size;
758 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200759 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200760 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700761
762 if (unlikely(cancel))
763 return 1;
764
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200765 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200767 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200769 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200771
772 /* We check for "finished" only in the reply path:
773 * w_e_end_ov_reply().
774 * We need to send at least one request out. */
775 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200776 && verify_can_do_stop_sector(device)
777 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200778 if (stop_sector_reached)
779 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780
781 size = BM_BLOCK_SIZE;
782
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100783 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200784 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 goto requeue;
786 }
787
788 if (sector + (size>>9) > capacity)
789 size = (capacity-sector)<<9;
790
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200791 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200792 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200793 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 return 0;
795 }
796 sector += BM_SECT_PER_BIT;
797 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200798 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799
800 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200801 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200802 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200803 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 return 1;
805}
806
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100807int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200809 struct drbd_device_work *dw =
810 container_of(w, struct drbd_device_work, w);
811 struct drbd_device *device = dw->device;
812 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200813 ov_out_of_sync_print(device);
814 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100816 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700817}
818
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100819static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200821 struct drbd_device_work *dw =
822 container_of(w, struct drbd_device_work, w);
823 struct drbd_device *device = dw->device;
824 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200826 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100828 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829}
830
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200831static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200832{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200833 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100834
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200835 clear_bit(GOT_PING_ACK, &connection->flags);
836 request_ping(connection);
837 wait_event(connection->ping_wait,
838 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200839}
840
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200841int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700842{
843 unsigned long db, dt, dbdt;
844 unsigned long n_oos;
845 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200846 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700847 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100848 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849
850 /* Remove all elements from the resync LRU. Since future actions
851 * might set bits in the (main) bitmap, then the entries in the
852 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200853 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700854 /* In case this is not possible now, most probably because
855 * there are P_RS_DATA_REPLY Packets lingering on the worker's
856 * queue (or even the read operations for those packets
857 * is not finished by now). Retry in 100ms. */
858
Philipp Reisner20ee6392011-01-18 15:28:59 +0100859 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200860 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
861 if (dw) {
862 dw->w.cb = w_resync_finished;
863 dw->device = device;
864 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
865 &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 return 1;
867 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200868 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700869 }
870
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200871 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872 if (dt <= 0)
873 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200874
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200875 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200876 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200877 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
878 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200879
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200881 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200883 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 goto out;
885
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200886 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200887
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200888 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200889 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890
Lars Ellenberg26525612010-11-05 09:56:33 +0100891 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
892
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 /* This protects us against multiple calls (that can happen in the presence
894 of application IO), and against connectivity loss just before we arrive here. */
895 if (os.conn <= C_CONNECTED)
896 goto out_unlock;
897
898 ns = os;
899 ns.conn = C_CONNECTED;
900
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200901 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200902 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200903 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200905 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906
907 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
908 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200909 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 n_oos, Bit2KB(1));
911 khelper_cmd = "out-of-sync";
912 }
913 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200914 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915
916 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
917 khelper_cmd = "after-resync-target";
918
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100919 if (device->use_csums && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200920 const unsigned long s = device->rs_same_csum;
921 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 const int ratio =
923 (t == 0) ? 0 :
924 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200925 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926 "transferred %luK total %luK\n",
927 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200928 Bit2KB(device->rs_same_csum),
929 Bit2KB(device->rs_total - device->rs_same_csum),
930 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 }
932 }
933
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200934 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200935 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936
937 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
938 ns.disk = D_INCONSISTENT;
939 ns.pdsk = D_UP_TO_DATE;
940 } else {
941 ns.disk = D_UP_TO_DATE;
942 ns.pdsk = D_INCONSISTENT;
943 }
944 } else {
945 ns.disk = D_UP_TO_DATE;
946 ns.pdsk = D_UP_TO_DATE;
947
948 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200949 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950 int i;
951 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200952 _drbd_uuid_set(device, i, device->p_uuid[i]);
953 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
954 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200956 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 }
958 }
959
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100960 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
961 /* for verify runs, we don't update uuids here,
962 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200963 drbd_uuid_set_bm(device, 0UL);
964 drbd_print_uuids(device, "updated UUIDs");
965 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100966 /* Now the two UUID sets are equal, update what we
967 * know of the peer. */
968 int i;
969 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200970 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100971 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972 }
973 }
974
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200975 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200977 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200978 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200980 device->rs_total = 0;
981 device->rs_failed = 0;
982 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200983
984 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200985 if (verify_done && device->ov_left == 0)
986 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200988 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200989
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200991 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992
993 return 1;
994}
995
996/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200997static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200999 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001001 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001002 atomic_add(i, &device->pp_in_use_by_net);
1003 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001004 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001005 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001006 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001007 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001009 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010}
1011
1012/**
1013 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001014 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 * @w: work object.
1016 * @cancel: The connection will be closed anyways
1017 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001018int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001020 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001021 struct drbd_peer_device *peer_device = peer_req->peer_device;
1022 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001023 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024
1025 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001026 drbd_free_peer_req(device, peer_req);
1027 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001028 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 }
1030
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001031 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001032 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033 } else {
1034 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001035 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001036 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001038 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 }
1040
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001041 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001043 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001045 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001046 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001047 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048}
1049
1050/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001051 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 * @w: work object.
1053 * @cancel: The connection will be closed anyways
1054 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001055int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001057 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001058 struct drbd_peer_device *peer_device = peer_req->peer_device;
1059 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001060 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061
1062 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001063 drbd_free_peer_req(device, peer_req);
1064 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001065 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 }
1067
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001068 if (get_ldev_if_state(device, D_FAILED)) {
1069 drbd_rs_complete_io(device, peer_req->i.sector);
1070 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071 }
1072
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001073 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001074 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001075 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001076 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1077 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001078 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001079 } else {
1080 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001081 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001083 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084 }
1085 } else {
1086 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001087 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001088 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001090 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091
1092 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001093 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094 }
1095
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001096 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001098 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001100 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001101 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001102 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001103}
1104
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001105int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001107 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001108 struct drbd_peer_device *peer_device = peer_req->peer_device;
1109 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001110 struct digest_info *di;
1111 int digest_size;
1112 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001113 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114
1115 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001116 drbd_free_peer_req(device, peer_req);
1117 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001118 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001119 }
1120
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001121 if (get_ldev(device)) {
1122 drbd_rs_complete_io(device, peer_req->i.sector);
1123 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001124 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001126 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001128 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001129 /* quick hack to try to avoid a race against reconfiguration.
1130 * a real fix would be much more involved,
1131 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001132 if (peer_device->connection->csums_tfm) {
1133 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001134 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001135 digest = kmalloc(digest_size, GFP_NOIO);
1136 }
1137 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001138 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001139 eq = !memcmp(digest, di->digest, digest_size);
1140 kfree(digest);
1141 }
1142
1143 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001144 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001145 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001146 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001147 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001148 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001149 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001150 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1151 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001152 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001153 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154 }
1155 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001156 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001157 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001158 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001159 }
1160
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001161 dec_unacked(device);
1162 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001163
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001164 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001165 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001166 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001167}
1168
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001169int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001171 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001172 struct drbd_peer_device *peer_device = peer_req->peer_device;
1173 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001174 sector_t sector = peer_req->i.sector;
1175 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001176 int digest_size;
1177 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001178 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179
1180 if (unlikely(cancel))
1181 goto out;
1182
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001183 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001184 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001185 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001186 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001187 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001188 }
1189
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001190 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001191 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001192 else
1193 memset(digest, 0, digest_size);
1194
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001195 /* Free e and pages before send.
1196 * In case we block on congestion, we could otherwise run into
1197 * some distributed deadlock, if the other side blocks on
1198 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001199 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001200 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001201 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001202 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001203 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001204 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001205 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001206 kfree(digest);
1207
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001209 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001210 drbd_free_peer_req(device, peer_req);
1211 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001212 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001213}
1214
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001215void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001217 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1218 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001220 device->ov_last_oos_start = sector;
1221 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001223 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224}
1225
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001226int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001228 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001229 struct drbd_peer_device *peer_device = peer_req->peer_device;
1230 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001233 sector_t sector = peer_req->i.sector;
1234 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001235 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001236 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001237 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238
1239 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001240 drbd_free_peer_req(device, peer_req);
1241 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001242 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 }
1244
1245 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1246 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001247 if (get_ldev(device)) {
1248 drbd_rs_complete_io(device, peer_req->i.sector);
1249 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001250 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001252 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001254 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001255 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 digest = kmalloc(digest_size, GFP_NOIO);
1257 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001258 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001260 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261 eq = !memcmp(digest, di->digest, digest_size);
1262 kfree(digest);
1263 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264 }
1265
Lars Ellenberg9676c762011-02-22 14:02:31 +01001266 /* Free peer_req and pages before send.
1267 * In case we block on congestion, we could otherwise run into
1268 * some distributed deadlock, if the other side blocks on
1269 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001270 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001271 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001273 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001275 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001277 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001278 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001280 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001282 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001283
1284 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001285 if ((device->ov_left & 0x200) == 0x200)
1286 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001287
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001288 stop_sector_reached = verify_can_do_stop_sector(device) &&
1289 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001290
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001291 if (device->ov_left == 0 || stop_sector_reached) {
1292 ov_out_of_sync_print(device);
1293 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294 }
1295
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001296 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001297}
1298
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001299/* FIXME
1300 * We need to track the number of pending barrier acks,
1301 * and to be able to wait for them.
1302 * See also comment in drbd_adm_attach before drbd_suspend_io.
1303 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001304static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001305{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001306 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001307 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001309 sock = &connection->data;
1310 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001311 if (!p)
1312 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001313 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001314 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001315 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001316
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001317 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318}
1319
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001320int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001322 struct drbd_device *device =
1323 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001324 struct drbd_socket *sock;
1325
Philipp Reisnerb411b362009-09-25 16:07:19 -07001326 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001327 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001328 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001329 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001330 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001331 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001332}
1333
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001334static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001335{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001336 if (!connection->send.seen_any_write_yet) {
1337 connection->send.seen_any_write_yet = true;
1338 connection->send.current_epoch_nr = epoch;
1339 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001340 }
1341}
1342
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001343static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001344{
1345 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001346 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001347 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001348 if (connection->send.current_epoch_nr != epoch) {
1349 if (connection->send.current_epoch_writes)
1350 drbd_send_barrier(connection);
1351 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001352 }
1353}
1354
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001355int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001356{
1357 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001358 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001359 struct drbd_peer_device *const peer_device = first_peer_device(device);
1360 struct drbd_connection *const connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001361 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001362
1363 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001364 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001365 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001366 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001367 req->pre_send_jif = jiffies;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001368
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001369 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001370 * If it was sent, it was the closing barrier for the last
1371 * replicated epoch, before we went into AHEAD mode.
1372 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001373 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001374
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001375 err = drbd_send_out_of_sync(peer_device, req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001376 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001377
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001378 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001379}
1380
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381/**
1382 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383 * @w: work object.
1384 * @cancel: The connection will be closed anyways
1385 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001386int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387{
1388 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001389 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001390 struct drbd_peer_device *const peer_device = first_peer_device(device);
1391 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001392 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001393
1394 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001395 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001396 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001397 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001398 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001399
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001400 re_init_if_first_write(connection, req->epoch);
1401 maybe_send_barrier(connection, req->epoch);
1402 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001403
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001404 err = drbd_send_dblock(peer_device, req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001405 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001406
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001407 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408}
1409
1410/**
1411 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001412 * @w: work object.
1413 * @cancel: The connection will be closed anyways
1414 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001415int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001416{
1417 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001418 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001419 struct drbd_peer_device *const peer_device = first_peer_device(device);
1420 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001421 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001422
1423 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001424 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001425 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001427 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001428
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001429 /* Even read requests may close a write epoch,
1430 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001431 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001432
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001433 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001434 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001435
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001436 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001438 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001439}
1440
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001441int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001442{
1443 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001444 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001445
Philipp Reisner07782862010-08-31 12:00:50 +02001446 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01001447 drbd_al_begin_io(device, &req->i);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001448
1449 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001450 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001451 generic_make_request(req->private_bio);
1452
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001453 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001454}
1455
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001456static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001458 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001459 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460
1461 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001462 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001463 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001464 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001465 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001466 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001467 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001469 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001470 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001471 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472 if ((odev->state.conn >= C_SYNC_SOURCE &&
1473 odev->state.conn <= C_PAUSED_SYNC_T) ||
1474 odev->state.aftr_isp || odev->state.peer_isp ||
1475 odev->state.user_isp)
1476 return 0;
1477 }
1478}
1479
1480/**
1481 * _drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001482 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483 *
1484 * Called from process context only (admin command and after_state_ch).
1485 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001486static int _drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001488 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001489 int i, rv = 0;
1490
Philipp Reisner695d08f2011-04-11 22:53:32 -07001491 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001492 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001493 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1494 continue;
1495 if (!_drbd_may_sync_now(odev))
1496 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1497 != SS_NOTHING_TO_DO);
1498 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001499 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500
1501 return rv;
1502}
1503
1504/**
1505 * _drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001506 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507 *
1508 * Called from process context only (admin command and worker).
1509 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001510static int _drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001512 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513 int i, rv = 0;
1514
Philipp Reisner695d08f2011-04-11 22:53:32 -07001515 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001516 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1518 continue;
1519 if (odev->state.aftr_isp) {
1520 if (_drbd_may_sync_now(odev))
1521 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1522 CS_HARD, NULL)
1523 != SS_NOTHING_TO_DO) ;
1524 }
1525 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001526 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527 return rv;
1528}
1529
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001530void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531{
1532 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001533 _drbd_resume_next(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001534 write_unlock_irq(&global_state_lock);
1535}
1536
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001537void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538{
1539 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001540 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541 write_unlock_irq(&global_state_lock);
1542}
1543
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001544/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001545enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001547 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001548 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549
1550 if (o_minor == -1)
1551 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001552 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001553 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554
1555 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001556 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001558 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001559 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001561 /* You are free to depend on diskless, non-existing,
1562 * or not yet/no longer existing minors.
1563 * We only reject dependency loops.
1564 * We cannot follow the dependency chain beyond a detached or
1565 * missing minor.
1566 */
1567 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1568 return NO_ERROR;
1569
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001570 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001571 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001572 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001574 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 return NO_ERROR;
1576
1577 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001578 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 }
1580}
1581
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001582/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001583void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584{
1585 int changes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001586
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001587 do {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001588 changes = _drbd_pause_after(device);
1589 changes |= _drbd_resume_next(device);
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001590 } while (changes);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591}
1592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001593void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001594{
Philipp Reisner813472c2011-05-03 16:47:02 +02001595 struct fifo_buffer *plan;
1596
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001597 atomic_set(&device->rs_sect_in, 0);
1598 atomic_set(&device->rs_sect_ev, 0);
1599 device->rs_in_flight = 0;
Philipp Reisner813472c2011-05-03 16:47:02 +02001600
1601 /* Updating the RCU protected object in place is necessary since
1602 this function gets called from atomic context.
1603 It is valid since all other updates also lead to an completely
1604 empty fifo */
1605 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001606 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001607 plan->total = 0;
1608 fifo_set(plan, 0);
1609 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001610}
1611
Philipp Reisner1f04af32011-02-07 11:33:59 +01001612void start_resync_timer_fn(unsigned long data)
1613{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001614 struct drbd_device *device = (struct drbd_device *) data;
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001615 drbd_device_post_work(device, RS_START);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001616}
1617
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001618static void do_start_resync(struct drbd_device *device)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001619{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001620 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001621 drbd_warn(device, "postponing start_resync ...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001622 device->start_resync_timer.expires = jiffies + HZ/10;
1623 add_timer(&device->start_resync_timer);
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001624 return;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001625 }
1626
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001627 drbd_start_resync(device, C_SYNC_SOURCE);
1628 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001629}
1630
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001631static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1632{
1633 bool csums_after_crash_only;
1634 rcu_read_lock();
1635 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1636 rcu_read_unlock();
1637 return connection->agreed_pro_version >= 89 && /* supported? */
1638 connection->csums_tfm && /* configured? */
1639 (csums_after_crash_only == 0 /* use for each resync? */
1640 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1641}
1642
Philipp Reisnerb411b362009-09-25 16:07:19 -07001643/**
1644 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001645 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1647 *
1648 * This function might bring you directly into one of the
1649 * C_PAUSED_SYNC_* states.
1650 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001651void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001653 struct drbd_peer_device *peer_device = first_peer_device(device);
1654 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655 union drbd_state ns;
1656 int r;
1657
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001658 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001659 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660 return;
1661 }
1662
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001663 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001664 if (side == C_SYNC_TARGET) {
1665 /* Since application IO was locked out during C_WF_BITMAP_T and
1666 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1667 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001668 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001669 r = (r >> 8) & 0xff;
1670 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001671 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001672 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001673 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001674 return;
1675 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001676 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001677 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001678 r = (r >> 8) & 0xff;
1679 if (r > 0) {
1680 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001681 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001682 "ignoring. Old userland tools?", r);
1683 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001684 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001685 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001686 conn_request_state(connection,
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001687 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001688 return;
1689 }
1690 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001691 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692 }
1693
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001694 if (current == connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001695 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001696 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001697 if (!mutex_trylock(device->state_mutex)) {
1698 set_bit(B_RS_H_DONE, &device->flags);
1699 device->start_resync_timer.expires = jiffies + HZ/5;
1700 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001701 return;
1702 }
1703 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001704 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001705 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001706 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001707
Lars Ellenberg074f4af2014-04-28 18:43:26 +02001708 /* req_lock: serialize with drbd_send_and_submit() and others
1709 * global_state_lock: for stable sync-after dependencies */
1710 spin_lock_irq(&device->resource->req_lock);
1711 write_lock(&global_state_lock);
Philipp Reisnera7004712013-03-27 14:08:35 +01001712 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001713 if (device->state.conn < C_CONNECTED
1714 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Lars Ellenberg074f4af2014-04-28 18:43:26 +02001715 write_unlock(&global_state_lock);
1716 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001717 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718 return;
1719 }
1720
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001721 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001723 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724
1725 ns.conn = side;
1726
1727 if (side == C_SYNC_TARGET)
1728 ns.disk = D_INCONSISTENT;
1729 else /* side == C_SYNC_SOURCE */
1730 ns.pdsk = D_INCONSISTENT;
1731
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001732 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1733 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734
1735 if (ns.conn < C_CONNECTED)
1736 r = SS_UNKNOWN_ERROR;
1737
1738 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001739 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001740 unsigned long now = jiffies;
1741 int i;
1742
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001743 device->rs_failed = 0;
1744 device->rs_paused = 0;
1745 device->rs_same_csum = 0;
1746 device->rs_last_events = 0;
1747 device->rs_last_sect_ev = 0;
1748 device->rs_total = tw;
1749 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001750 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001751 device->rs_mark_left[i] = tw;
1752 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001753 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001754 _drbd_pause_after(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001755 /* Forget potentially stale cached per resync extent bit-counts.
1756 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1757 * disabled, and know the disk state is ok. */
1758 spin_lock(&device->al_lock);
1759 lc_reset(device->resync);
1760 device->resync_locked = 0;
1761 device->resync_wenr = LC_FREE;
1762 spin_unlock(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763 }
Lars Ellenberg074f4af2014-04-28 18:43:26 +02001764 write_unlock(&global_state_lock);
1765 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001766
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767 if (r == SS_SUCCESS) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001768 wake_up(&device->al_wait); /* for lc_reset() above */
Philipp Reisner328e0f12012-10-19 14:37:47 +02001769 /* reset rs_last_bcast when a resync or verify is started,
1770 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001771 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001772
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001773 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001775 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1776 (unsigned long) device->rs_total);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001777 if (side == C_SYNC_TARGET) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001778 device->bm_resync_fo = 0;
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001779 device->use_csums = use_checksum_based_resync(connection, device);
1780 } else {
1781 device->use_csums = 0;
1782 }
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001783
1784 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1785 * with w_send_oos, or the sync target will get confused as to
1786 * how much bits to resync. We cannot do that always, because for an
1787 * empty resync and protocol < 95, we need to do it here, as we call
1788 * drbd_resync_finished from here in that case.
1789 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1790 * and from after_state_ch otherwise. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001791 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1792 drbd_gen_and_send_sync_uuid(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001794 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001795 /* This still has a race (about when exactly the peers
1796 * detect connection loss) that can lead to a full sync
1797 * on next handshake. In 8.3.9 we fixed this with explicit
1798 * resync-finished notifications, but the fix
1799 * introduces a protocol change. Sleeping for some
1800 * time longer than the ping interval + timeout on the
1801 * SyncSource, to give the SyncTarget the chance to
1802 * detect connection loss, then waiting for a ping
1803 * response (implicit in drbd_resync_finished) reduces
1804 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001805 if (side == C_SYNC_SOURCE) {
1806 struct net_conf *nc;
1807 int timeo;
1808
1809 rcu_read_lock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001810 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001811 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1812 rcu_read_unlock();
1813 schedule_timeout_interruptible(timeo);
1814 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001815 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816 }
1817
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001818 drbd_rs_controller_reset(device);
1819 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 * we may have been paused in between, or become paused until
1821 * the timer triggers.
1822 * No matter, that is handled in resync_timer_fn() */
1823 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001824 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001826 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001828 put_ldev(device);
1829 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830}
1831
Lars Ellenberge334f552014-02-11 09:30:49 +01001832static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001833{
1834 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1835 device->rs_last_bcast = jiffies;
1836
1837 if (!get_ldev(device))
1838 return;
1839
1840 drbd_bm_write_lazy(device, 0);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001841 if (resync_done && is_sync_state(device->state.conn))
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001842 drbd_resync_finished(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001843
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001844 drbd_bcast_event(device, &sib);
1845 /* update timestamp, in case it took a while to write out stuff */
1846 device->rs_last_bcast = jiffies;
1847 put_ldev(device);
1848}
1849
Lars Ellenberge334f552014-02-11 09:30:49 +01001850static void drbd_ldev_destroy(struct drbd_device *device)
1851{
1852 lc_destroy(device->resync);
1853 device->resync = NULL;
1854 lc_destroy(device->act_log);
1855 device->act_log = NULL;
1856 __no_warn(local,
1857 drbd_free_ldev(device->ldev);
1858 device->ldev = NULL;);
1859 clear_bit(GOING_DISKLESS, &device->flags);
1860 wake_up(&device->misc_wait);
1861}
1862
1863static void go_diskless(struct drbd_device *device)
1864{
1865 D_ASSERT(device, device->state.disk == D_FAILED);
1866 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1867 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1868 * the protected members anymore, though, so once put_ldev reaches zero
1869 * again, it will be safe to free them. */
1870
1871 /* Try to write changed bitmap pages, read errors may have just
1872 * set some bits outside the area covered by the activity log.
1873 *
1874 * If we have an IO error during the bitmap writeout,
1875 * we will want a full sync next time, just in case.
1876 * (Do we want a specific meta data flag for this?)
1877 *
1878 * If that does not make it to stable storage either,
1879 * we cannot do anything about that anymore.
1880 *
1881 * We still need to check if both bitmap and ldev are present, we may
1882 * end up here after a failed attach, before ldev was even assigned.
1883 */
1884 if (device->bitmap && device->ldev) {
1885 /* An interrupted resync or similar is allowed to recounts bits
1886 * while we detach.
1887 * Any modifications would not be expected anymore, though.
1888 */
1889 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1890 "detach", BM_LOCKED_TEST_ALLOWED)) {
1891 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1892 drbd_md_set_flag(device, MDF_FULL_SYNC);
1893 drbd_md_sync(device);
1894 }
1895 }
1896 }
1897
1898 drbd_force_state(device, NS(disk, D_DISKLESS));
1899}
1900
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001901static int do_md_sync(struct drbd_device *device)
1902{
1903 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1904 drbd_md_sync(device);
1905 return 0;
1906}
1907
Lars Ellenberg944410e2014-05-06 15:02:05 +02001908/* only called from drbd_worker thread, no locking */
1909void __update_timing_details(
1910 struct drbd_thread_timing_details *tdp,
1911 unsigned int *cb_nr,
1912 void *cb,
1913 const char *fn, const unsigned int line)
1914{
1915 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1916 struct drbd_thread_timing_details *td = tdp + i;
1917
1918 td->start_jif = jiffies;
1919 td->cb_addr = cb;
1920 td->caller_fn = fn;
1921 td->line = line;
1922 td->cb_nr = *cb_nr;
1923
1924 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1925 td = tdp + i;
1926 memset(td, 0, sizeof(*td));
1927
1928 ++(*cb_nr);
1929}
1930
Lars Ellenberge334f552014-02-11 09:30:49 +01001931#define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit))
1932static void do_device_work(struct drbd_device *device, const unsigned long todo)
1933{
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001934 if (WORK_PENDING(MD_SYNC, todo))
1935 do_md_sync(device);
Lars Ellenberge334f552014-02-11 09:30:49 +01001936 if (WORK_PENDING(RS_DONE, todo) ||
1937 WORK_PENDING(RS_PROGRESS, todo))
1938 update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
1939 if (WORK_PENDING(GO_DISKLESS, todo))
1940 go_diskless(device);
1941 if (WORK_PENDING(DESTROY_DISK, todo))
1942 drbd_ldev_destroy(device);
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001943 if (WORK_PENDING(RS_START, todo))
1944 do_start_resync(device);
Lars Ellenberge334f552014-02-11 09:30:49 +01001945}
1946
1947#define DRBD_DEVICE_WORK_MASK \
1948 ((1UL << GO_DISKLESS) \
1949 |(1UL << DESTROY_DISK) \
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001950 |(1UL << MD_SYNC) \
1951 |(1UL << RS_START) \
Lars Ellenberge334f552014-02-11 09:30:49 +01001952 |(1UL << RS_PROGRESS) \
1953 |(1UL << RS_DONE) \
1954 )
1955
1956static unsigned long get_work_bits(unsigned long *flags)
1957{
1958 unsigned long old, new;
1959 do {
1960 old = *flags;
1961 new = old & ~DRBD_DEVICE_WORK_MASK;
1962 } while (cmpxchg(flags, old, new) != old);
1963 return old & DRBD_DEVICE_WORK_MASK;
1964}
1965
1966static void do_unqueued_work(struct drbd_connection *connection)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001967{
1968 struct drbd_peer_device *peer_device;
1969 int vnr;
1970
1971 rcu_read_lock();
1972 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1973 struct drbd_device *device = peer_device->device;
Lars Ellenberge334f552014-02-11 09:30:49 +01001974 unsigned long todo = get_work_bits(&device->flags);
1975 if (!todo)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001976 continue;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001977
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001978 kref_get(&device->kref);
1979 rcu_read_unlock();
Lars Ellenberge334f552014-02-11 09:30:49 +01001980 do_device_work(device, todo);
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001981 kref_put(&device->kref, drbd_destroy_device);
1982 rcu_read_lock();
1983 }
1984 rcu_read_unlock();
1985}
1986
Rashika Kheriaa186e472013-12-19 15:06:10 +05301987static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001988{
1989 spin_lock_irq(&queue->q_lock);
Lars Ellenberg15e26f62014-04-28 11:43:21 +02001990 list_splice_tail_init(&queue->q, work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001991 spin_unlock_irq(&queue->q_lock);
1992 return !list_empty(work_list);
1993}
1994
Rashika Kheriaa186e472013-12-19 15:06:10 +05301995static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001996{
1997 spin_lock_irq(&queue->q_lock);
1998 if (!list_empty(&queue->q))
1999 list_move(queue->q.next, work_list);
2000 spin_unlock_irq(&queue->q_lock);
2001 return !list_empty(work_list);
2002}
2003
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002004static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002005{
2006 DEFINE_WAIT(wait);
2007 struct net_conf *nc;
2008 int uncork, cork;
2009
2010 dequeue_work_item(&connection->sender_work, work_list);
2011 if (!list_empty(work_list))
2012 return;
2013
2014 /* Still nothing to do?
2015 * Maybe we still need to close the current epoch,
2016 * even if no new requests are queued yet.
2017 *
2018 * Also, poke TCP, just in case.
2019 * Then wait for new work (or signal). */
2020 rcu_read_lock();
2021 nc = rcu_dereference(connection->net_conf);
2022 uncork = nc ? nc->tcp_cork : 0;
2023 rcu_read_unlock();
2024 if (uncork) {
2025 mutex_lock(&connection->data.mutex);
2026 if (connection->data.socket)
2027 drbd_tcp_uncork(connection->data.socket);
2028 mutex_unlock(&connection->data.mutex);
2029 }
2030
2031 for (;;) {
2032 int send_barrier;
2033 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002034 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002035 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02002036 /* dequeue single item only,
2037 * we still use drbd_queue_work_front() in some places */
2038 if (!list_empty(&connection->sender_work.q))
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002039 list_splice_tail_init(&connection->sender_work.q, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002040 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
2041 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002042 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002043 break;
2044 }
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002045
2046 /* We found nothing new to do, no to-be-communicated request,
2047 * no other work item. We may still need to close the last
2048 * epoch. Next incoming request epoch will be connection ->
2049 * current transfer log epoch number. If that is different
2050 * from the epoch of the last request we communicated, it is
2051 * safe to send the epoch separating barrier now.
2052 */
2053 send_barrier =
2054 atomic_read(&connection->current_tle_nr) !=
2055 connection->send.current_epoch_nr;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002056 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002057
2058 if (send_barrier)
2059 maybe_send_barrier(connection,
2060 connection->send.current_epoch_nr + 1);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002061
Lars Ellenberge334f552014-02-11 09:30:49 +01002062 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002063 break;
2064
Lars Ellenberga80ca1a2013-12-27 17:17:25 +01002065 /* drbd_send() may have called flush_signals() */
2066 if (get_t_state(&connection->worker) != RUNNING)
2067 break;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002068
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002069 schedule();
2070 /* may be woken up for other things but new work, too,
2071 * e.g. if the current epoch got closed.
2072 * In which case we send the barrier above. */
2073 }
2074 finish_wait(&connection->sender_work.q_wait, &wait);
2075
2076 /* someone may have changed the config while we have been waiting above. */
2077 rcu_read_lock();
2078 nc = rcu_dereference(connection->net_conf);
2079 cork = nc ? nc->tcp_cork : 0;
2080 rcu_read_unlock();
2081 mutex_lock(&connection->data.mutex);
2082 if (connection->data.socket) {
2083 if (cork)
2084 drbd_tcp_cork(connection->data.socket);
2085 else if (!uncork)
2086 drbd_tcp_uncork(connection->data.socket);
2087 }
2088 mutex_unlock(&connection->data.mutex);
2089}
2090
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091int drbd_worker(struct drbd_thread *thi)
2092{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002093 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002094 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002095 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002096 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002097 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002098
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002099 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01002100 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002101
Lars Ellenberg944410e2014-05-06 15:02:05 +02002102 if (list_empty(&work_list)) {
2103 update_worker_timing_details(connection, wait_for_work);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002104 wait_for_work(connection, &work_list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002105 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002106
Lars Ellenberg944410e2014-05-06 15:02:05 +02002107 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2108 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002109 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002110 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002111
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002112 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002113 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01002114 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02002115 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002116 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01002117 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002118 break;
2119 }
2120
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002121 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002123
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002124 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002125 w = list_first_entry(&work_list, struct drbd_work, list);
2126 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002127 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002128 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002129 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002130 if (connection->cstate >= C_WF_REPORT_PARAMS)
2131 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002132 }
2133 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002135 do {
Lars Ellenberg944410e2014-05-06 15:02:05 +02002136 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2137 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002138 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002139 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002140 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002141 w = list_first_entry(&work_list, struct drbd_work, list);
2142 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002143 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002144 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002146 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberge334f552014-02-11 09:30:49 +01002147 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002148
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002149 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002150 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2151 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002152 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002153 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002154 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002155 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02002156 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002157 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01002158 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002159 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160
2161 return 0;
2162}