blob: 8bbabe37ef0d6945c30d700fad259a19641803a3 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
Andreas Gruenbachered15b792014-09-11 14:29:06 +020046 * drbd_md_endio (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbachered15b792014-09-11 14:29:06 +020049 * drbd_bm_endio (defined in drbd_bitmap.c)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010050 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
Philipp Reisnerb411b362009-09-25 16:07:19 -070058/* used for synchronous meta data and bitmap IO
59 * submitted by drbd_md_sync_page_io()
60 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +020061void drbd_md_endio(struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -070062{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020063 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070064
Lars Ellenberge37d2432014-04-01 23:53:30 +020065 device = bio->bi_private;
Christoph Hellwig4246a0b2015-07-20 15:29:37 +020066 device->md_io.error = bio->bi_error;
Philipp Reisnerb411b362009-09-25 16:07:19 -070067
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010068 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
69 * to timeout on the lower level device, and eventually detach from it.
70 * If this io completion runs after that timeout expired, this
71 * drbd_md_put_buffer() may allow us to finally try and re-attach.
72 * During normal operation, this only puts that extra reference
73 * down to 1 again.
74 * Make sure we first drop the reference, and only then signal
75 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
76 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020077 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020079 drbd_md_put_buffer(device);
Lars Ellenberge37d2432014-04-01 23:53:30 +020080 device->md_io.done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020081 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020082 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020083 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
84 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070085}
86
87/* reads on behalf of the partner,
88 * "submitted" by the receiver
89 */
Rashika Kheriaa186e472013-12-19 15:06:10 +053090static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -070091{
92 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +020093 struct drbd_peer_device *peer_device = peer_req->peer_device;
94 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070095
Andreas Gruenbacher05008132011-07-07 14:19:42 +020096 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020097 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +020098 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020099 if (list_empty(&device->read_ee))
100 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100101 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200102 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200103 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700104
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200105 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200106 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700107}
108
109/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200110 * "submitted" by the receiver, final stage. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200111void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700112{
113 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200114 struct drbd_peer_device *peer_device = peer_req->peer_device;
115 struct drbd_device *device = peer_device->device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200116 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700117 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100118 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700119 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700120
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100121 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122 * we may no longer access it,
123 * it may be freed/reused already!
124 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200125 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100126 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
127 block_id = peer_req->block_id;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200128 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200130 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200131 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200132 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700133
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100134 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100135 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100136 * Ack yet and did not wake possibly waiting conflicting requests.
137 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200138 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100139 * _drbd_clear_done_ee.
140 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700141
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200142 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700143
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200144 /* FIXME do we want to detach for failed REQ_DISCARD?
145 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
146 if (peer_req->flags & EE_WAS_ERROR)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200147 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200148 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700149
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100150 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200151 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152
153 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700155
156 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200157 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200159 wake_asender(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200160 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200161}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163/* writes on behalf of the partner, or resync writes,
164 * "submitted" by the receiver.
165 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200166void drbd_peer_request_endio(struct bio *bio)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100168 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200169 struct drbd_device *device = peer_req->peer_device->device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 int is_write = bio_data_dir(bio) == WRITE;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200171 int is_discard = !!(bio->bi_rw & REQ_DISCARD);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200173 if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200174 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200175 is_write ? (is_discard ? "discard" : "write")
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200176 : "read", bio->bi_error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100177 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200179 if (bio->bi_error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100180 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200181
182 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100183 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200184 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100185 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200186 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100187 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200188 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700189}
190
Lars Ellenberg142207f2015-02-19 13:48:59 +0100191void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
192{
193 panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
194 device->minor, device->resource->name, device->vnr);
195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
198 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200199void drbd_request_endio(struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200{
Lars Ellenberga1154132010-11-13 20:42:29 +0100201 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200203 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100204 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 enum drbd_req_event what;
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200206
207 /* If this request was aborted locally before,
208 * but now was completed "successfully",
209 * chances are that this caused arbitrary data corruption.
210 *
211 * "aborting" requests, or force-detaching the disk, is intended for
212 * completely blocked/hung local backing devices which do no longer
213 * complete requests at all, not even do error completions. In this
214 * situation, usually a hard-reset and failover is the only way out.
215 *
216 * By "aborting", basically faking a local error-completion,
217 * we allow for a more graceful swichover by cleanly migrating services.
218 * Still the affected node has to be rebooted "soon".
219 *
220 * By completing these requests, we allow the upper layers to re-use
221 * the associated data pages.
222 *
223 * If later the local backing device "recovers", and now DMAs some data
224 * from disk into the original request pages, in the best case it will
225 * just put random data into unused pages; but typically it will corrupt
226 * meanwhile completely unrelated data, causing all sorts of damage.
227 *
228 * Which means delayed successful completion,
229 * especially for READ requests,
230 * is a reason to panic().
231 *
232 * We assume that a delayed *error* completion is OK,
233 * though we still will complain noisily about it.
234 */
235 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
236 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200237 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200238
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200239 if (!bio->bi_error)
Lars Ellenberg142207f2015-02-19 13:48:59 +0100240 drbd_panic_after_delayed_completion_of_aborted_request(device);
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200241 }
242
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243 /* to avoid recursion in __req_mod */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200244 if (unlikely(bio->bi_error)) {
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200245 if (bio->bi_rw & REQ_DISCARD)
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200246 what = (bio->bi_error == -EOPNOTSUPP)
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200247 ? DISCARD_COMPLETED_NOTSUPP
248 : DISCARD_COMPLETED_WITH_ERROR;
249 else
250 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100251 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200252 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100253 ? READ_COMPLETED_WITH_ERROR
254 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100256 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257
258 bio_put(req->private_bio);
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200259 req->private_bio = ERR_PTR(bio->bi_error);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberga1154132010-11-13 20:42:29 +0100261 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200262 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100263 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200264 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100266
267 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200268 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269}
270
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200271void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200272{
273 struct hash_desc desc;
274 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100275 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200276 struct page *tmp;
277 unsigned len;
278
279 desc.tfm = tfm;
280 desc.flags = 0;
281
282 sg_init_table(&sg, 1);
283 crypto_hash_init(&desc);
284
285 while ((tmp = page_chain_next(page))) {
286 /* all but the last page will be fully used */
287 sg_set_page(&sg, page, PAGE_SIZE, 0);
288 crypto_hash_update(&desc, &sg, sg.length);
289 page = tmp;
290 }
291 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100292 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
294 crypto_hash_update(&desc, &sg, sg.length);
295 crypto_hash_final(&desc, digest);
296}
297
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200298void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700299{
300 struct hash_desc desc;
301 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800302 struct bio_vec bvec;
303 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304
305 desc.tfm = tfm;
306 desc.flags = 0;
307
308 sg_init_table(&sg, 1);
309 crypto_hash_init(&desc);
310
Kent Overstreet79886132013-11-23 17:19:00 -0800311 bio_for_each_segment(bvec, bio, iter) {
312 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313 crypto_hash_update(&desc, &sg, sg.length);
314 }
315 crypto_hash_final(&desc, digest);
316}
317
Lars Ellenberg9676c762011-02-22 14:02:31 +0100318/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100319static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200321 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200322 struct drbd_peer_device *peer_device = peer_req->peer_device;
323 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 int digest_size;
325 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100326 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100328 if (unlikely(cancel))
329 goto out;
330
Lars Ellenberg9676c762011-02-22 14:02:31 +0100331 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100332 goto out;
333
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200334 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100335 digest = kmalloc(digest_size, GFP_NOIO);
336 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100337 sector_t sector = peer_req->i.sector;
338 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200339 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100340 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100341 * In case we block on congestion, we could otherwise run into
342 * some distributed deadlock, if the other side blocks on
343 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200344 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200345 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200347 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200348 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100349 digest, digest_size,
350 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100351 kfree(digest);
352 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200353 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100354 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 }
356
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100357out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200359 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700360
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100361 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200362 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100363 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364}
365
366#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
367
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200368static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200370 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200373 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200374 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
376 /* GFP_TRY, because if there is no memory available right now, this may
377 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200378 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200379 size, true /* has real payload */, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200381 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200383 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200384 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +0200385 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200386 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388 atomic_add(size >> 9, &device->rs_sect_ev);
389 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200390 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100392 /* If it failed because of ENOMEM, retry should help. If it failed
393 * because bio_add_page failed (probably broken lower level driver),
394 * retry may or may not help.
395 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200396 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200397 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200398 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200401defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200402 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200403 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404}
405
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100406int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100407{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200408 struct drbd_device *device =
409 container_of(w, struct drbd_device, resync_work);
410
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100412 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200413 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100414 break;
415 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200416 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100417 break;
418 }
419
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100420 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100421}
422
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423void resync_timer_fn(unsigned long data)
424{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200425 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
Lars Ellenberg15e26f62014-04-28 11:43:21 +0200427 drbd_queue_work_if_unqueued(
428 &first_peer_device(device)->connection->sender_work,
429 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430}
431
Philipp Reisner778f2712010-07-06 11:14:00 +0200432static void fifo_set(struct fifo_buffer *fb, int value)
433{
434 int i;
435
436 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200437 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200438}
439
440static int fifo_push(struct fifo_buffer *fb, int value)
441{
442 int ov;
443
444 ov = fb->values[fb->head_index];
445 fb->values[fb->head_index++] = value;
446
447 if (fb->head_index >= fb->size)
448 fb->head_index = 0;
449
450 return ov;
451}
452
453static void fifo_add_val(struct fifo_buffer *fb, int value)
454{
455 int i;
456
457 for (i = 0; i < fb->size; i++)
458 fb->values[i] += value;
459}
460
Philipp Reisner9958c852011-05-03 16:19:31 +0200461struct fifo_buffer *fifo_alloc(int fifo_size)
462{
463 struct fifo_buffer *fb;
464
Lars Ellenberg8747d302012-09-26 14:22:40 +0200465 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200466 if (!fb)
467 return NULL;
468
469 fb->head_index = 0;
470 fb->size = fifo_size;
471 fb->total = 0;
472
473 return fb;
474}
475
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200476static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
Philipp Reisner778f2712010-07-06 11:14:00 +0200477{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200478 struct disk_conf *dc;
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200479 unsigned int want; /* The number of sectors we want in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200480 int req_sect; /* Number of sectors to request in this turn */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200481 int correction; /* Number of sectors more we need in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200482 int cps; /* correction per invocation of drbd_rs_controller() */
483 int steps; /* Number of time steps to plan ahead */
484 int curr_corr;
485 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200486 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200487
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200488 dc = rcu_dereference(device->ldev->disk_conf);
489 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200490
Philipp Reisner813472c2011-05-03 16:47:02 +0200491 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200492
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200493 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200494 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200495 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200496 want = dc->c_fill_target ? dc->c_fill_target :
497 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200498 }
499
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200500 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200501
502 /* Plan ahead */
503 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200504 fifo_add_val(plan, cps);
505 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200506
507 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200508 curr_corr = fifo_push(plan, 0);
509 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200510
511 req_sect = sect_in + curr_corr;
512 if (req_sect < 0)
513 req_sect = 0;
514
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200515 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200516 if (req_sect > max_sect)
517 req_sect = max_sect;
518
519 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200520 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200521 sect_in, device->rs_in_flight, want, correction,
522 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200523 */
524
525 return req_sect;
526}
527
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200528static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100529{
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200530 unsigned int sect_in; /* Number of sectors that came in since the last turn */
531 int number, mxb;
532
533 sect_in = atomic_xchg(&device->rs_sect_in, 0);
534 device->rs_in_flight -= sect_in;
Philipp Reisner813472c2011-05-03 16:47:02 +0200535
536 rcu_read_lock();
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200537 mxb = drbd_get_max_buffers(device) / 2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200538 if (rcu_dereference(device->rs_plan_s)->size) {
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200539 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200540 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100541 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200542 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
543 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100544 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200545 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100546
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200547 /* Don't have more than "max-buffers"/2 in-flight.
548 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
549 * potentially causing a distributed deadlock on congestion during
550 * online-verify or (checksum-based) resync, if max-buffers,
551 * socket buffer sizes and resync rate settings are mis-configured. */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200552
553 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
554 * mxb (as used here, and in drbd_alloc_pages on the peer) is
555 * "number of pages" (typically also 4k),
556 * but "rs_in_flight" is in "sectors" (512 Byte). */
557 if (mxb - device->rs_in_flight/8 < number)
558 number = mxb - device->rs_in_flight/8;
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200559
Lars Ellenberge65f4402010-11-05 10:04:07 +0100560 return number;
561}
562
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100563static int make_resync_request(struct drbd_device *const device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564{
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100565 struct drbd_peer_device *const peer_device = first_peer_device(device);
566 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567 unsigned long bit;
568 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200569 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100570 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100571 int number, rollback_i, size;
Lars Ellenberg506afb62014-01-31 14:55:12 +0100572 int align, requeue = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200573 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574
575 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100576 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700577
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200578 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200579 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200580 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100581 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200582 }
583
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200584 if (!get_ldev(device)) {
585 /* Since we only need to access device->rsync a
586 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 to continue resync with a broken disk makes no sense at
588 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200589 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100590 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 }
592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200593 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
594 number = drbd_rs_number_requests(device);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200595 if (number <= 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200596 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
Philipp Reisnerb411b362009-09-25 16:07:19 -0700598 for (i = 0; i < number; i++) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100599 /* Stop generating RS requests when half of the send buffer is filled,
600 * but notify TCP that we'd like to have more space. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100601 mutex_lock(&connection->data.mutex);
602 if (connection->data.socket) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100603 struct sock *sk = connection->data.socket->sk;
604 int queued = sk->sk_wmem_queued;
605 int sndbuf = sk->sk_sndbuf;
606 if (queued > sndbuf / 2) {
607 requeue = 1;
608 if (sk->sk_socket)
609 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
610 }
611 } else
612 requeue = 1;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100613 mutex_unlock(&connection->data.mutex);
Lars Ellenberg506afb62014-01-31 14:55:12 +0100614 if (requeue)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700615 goto requeue;
616
617next_sector:
618 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200619 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100621 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200622 device->bm_resync_fo = drbd_bm_bits(device);
623 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100624 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625 }
626
627 sector = BM_BIT_TO_SECT(bit);
628
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100629 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200630 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631 goto requeue;
632 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200633 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700634
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200635 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
636 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700637 goto next_sector;
638 }
639
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100640#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641 /* try to find some adjacent bits.
642 * we stop if we have already the maximum req size.
643 *
644 * Additionally always align bigger requests, in order to
645 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700646 */
647 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200648 rollback_i = i;
Lars Ellenberg6377b922014-04-28 18:43:17 +0200649 while (i < number) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100650 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 break;
652
653 /* Be always aligned */
654 if (sector & ((1<<(align+3))-1))
655 break;
656
657 /* do not cross extent boundaries */
658 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
659 break;
660 /* now, is it actually dirty, after all?
661 * caution, drbd_bm_test_bit is tri-state for some
662 * obscure reason; ( b == 0 ) would get the out-of-band
663 * only accidentally right because of the "oddly sized"
664 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200665 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666 break;
667 bit++;
668 size += BM_BLOCK_SIZE;
669 if ((BM_BLOCK_SIZE << align) <= size)
670 align++;
671 i++;
672 }
673 /* if we merged some,
674 * reset the offset to start the next drbd_bm_find_next from */
675 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200676 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677#endif
678
679 /* adjust very last sectors, in case we are oddly sized */
680 if (sector + (size>>9) > capacity)
681 size = (capacity-sector)<<9;
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100682
683 if (device->use_csums) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100684 switch (read_for_csum(peer_device, sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200685 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200686 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100687 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200688 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200689 drbd_rs_complete_io(device, sector);
690 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200691 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200693 case 0:
694 /* everything ok */
695 break;
696 default:
697 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100700 int err;
701
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200702 inc_rs_pending(device);
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100703 err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100704 sector, size, ID_SYNCER);
705 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200706 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200707 dec_rs_pending(device);
708 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100709 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 }
711 }
712 }
713
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200714 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 /* last syncer _request_ was sent,
716 * but the P_RS_DATA_REPLY not yet received. sync will end (and
717 * next sync group will resume), as soon as we receive the last
718 * resync data block, and the last bit is cleared.
719 * until then resync "work" is "inactive" ...
720 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200721 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100722 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723 }
724
725 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200726 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
727 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
728 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100729 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730}
731
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200732static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733{
734 int number, i, size;
735 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200736 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200737 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (unlikely(cancel))
740 return 1;
741
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200742 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700743
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200744 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200746 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700747 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200748
749 /* We check for "finished" only in the reply path:
750 * w_e_end_ov_reply().
751 * We need to send at least one request out. */
752 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200753 && verify_can_do_stop_sector(device)
754 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200755 if (stop_sector_reached)
756 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757
758 size = BM_BLOCK_SIZE;
759
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100760 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200761 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700762 goto requeue;
763 }
764
765 if (sector + (size>>9) > capacity)
766 size = (capacity-sector)<<9;
767
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200768 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200769 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200770 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 return 0;
772 }
773 sector += BM_SECT_PER_BIT;
774 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200775 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200778 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200779 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200780 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781 return 1;
782}
783
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100784int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200786 struct drbd_device_work *dw =
787 container_of(w, struct drbd_device_work, w);
788 struct drbd_device *device = dw->device;
789 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200790 ov_out_of_sync_print(device);
791 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100793 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794}
795
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100796static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200798 struct drbd_device_work *dw =
799 container_of(w, struct drbd_device_work, w);
800 struct drbd_device *device = dw->device;
801 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700802
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200803 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100805 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806}
807
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200808static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200809{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200810 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100811
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200812 clear_bit(GOT_PING_ACK, &connection->flags);
813 request_ping(connection);
814 wait_event(connection->ping_wait,
815 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200816}
817
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200818int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819{
820 unsigned long db, dt, dbdt;
821 unsigned long n_oos;
822 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200823 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100825 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826
827 /* Remove all elements from the resync LRU. Since future actions
828 * might set bits in the (main) bitmap, then the entries in the
829 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200830 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 /* In case this is not possible now, most probably because
832 * there are P_RS_DATA_REPLY Packets lingering on the worker's
833 * queue (or even the read operations for those packets
834 * is not finished by now). Retry in 100ms. */
835
Philipp Reisner20ee6392011-01-18 15:28:59 +0100836 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200837 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
838 if (dw) {
839 dw->w.cb = w_resync_finished;
840 dw->device = device;
841 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
842 &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843 return 1;
844 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200845 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700846 }
847
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200848 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 if (dt <= 0)
850 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200851
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200852 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200853 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200854 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
855 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200856
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200858 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200860 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861 goto out;
862
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200863 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200864
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200865 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200866 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867
Lars Ellenberg26525612010-11-05 09:56:33 +0100868 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
869
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 /* This protects us against multiple calls (that can happen in the presence
871 of application IO), and against connectivity loss just before we arrive here. */
872 if (os.conn <= C_CONNECTED)
873 goto out_unlock;
874
875 ns = os;
876 ns.conn = C_CONNECTED;
877
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200878 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200879 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200880 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200882 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883
884 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
885 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200886 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887 n_oos, Bit2KB(1));
888 khelper_cmd = "out-of-sync";
889 }
890 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200891 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892
893 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
894 khelper_cmd = "after-resync-target";
895
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100896 if (device->use_csums && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200897 const unsigned long s = device->rs_same_csum;
898 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899 const int ratio =
900 (t == 0) ? 0 :
901 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200902 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903 "transferred %luK total %luK\n",
904 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200905 Bit2KB(device->rs_same_csum),
906 Bit2KB(device->rs_total - device->rs_same_csum),
907 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908 }
909 }
910
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200911 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200912 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913
914 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
915 ns.disk = D_INCONSISTENT;
916 ns.pdsk = D_UP_TO_DATE;
917 } else {
918 ns.disk = D_UP_TO_DATE;
919 ns.pdsk = D_INCONSISTENT;
920 }
921 } else {
922 ns.disk = D_UP_TO_DATE;
923 ns.pdsk = D_UP_TO_DATE;
924
925 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200926 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 int i;
928 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200929 _drbd_uuid_set(device, i, device->p_uuid[i]);
930 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
931 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200933 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 }
935 }
936
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100937 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
938 /* for verify runs, we don't update uuids here,
939 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200940 drbd_uuid_set_bm(device, 0UL);
941 drbd_print_uuids(device, "updated UUIDs");
942 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100943 /* Now the two UUID sets are equal, update what we
944 * know of the peer. */
945 int i;
946 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200947 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100948 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700949 }
950 }
951
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200952 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200954 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200955 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200957 device->rs_total = 0;
958 device->rs_failed = 0;
959 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200960
961 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200962 if (verify_done && device->ov_left == 0)
963 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200965 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200966
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200968 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969
970 return 1;
971}
972
973/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200974static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200976 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100978 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200979 atomic_add(i, &device->pp_in_use_by_net);
980 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200981 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200982 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200983 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200984 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200986 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987}
988
989/**
990 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200991 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 * @w: work object.
993 * @cancel: The connection will be closed anyways
994 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100995int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200997 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200998 struct drbd_peer_device *peer_device = peer_req->peer_device;
999 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001000 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001
1002 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001003 drbd_free_peer_req(device, peer_req);
1004 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001005 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006 }
1007
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001008 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001009 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010 } else {
1011 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001012 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001013 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001015 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 }
1017
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001018 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001022 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001023 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001024 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025}
1026
1027/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001028 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 * @w: work object.
1030 * @cancel: The connection will be closed anyways
1031 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001032int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001034 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001035 struct drbd_peer_device *peer_device = peer_req->peer_device;
1036 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001037 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038
1039 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001040 drbd_free_peer_req(device, peer_req);
1041 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001042 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043 }
1044
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001045 if (get_ldev_if_state(device, D_FAILED)) {
1046 drbd_rs_complete_io(device, peer_req->i.sector);
1047 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 }
1049
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001050 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001051 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001052 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001053 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1054 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001055 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056 } else {
1057 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001058 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001060 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 }
1062 } else {
1063 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001064 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001065 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001067 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068
1069 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001070 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071 }
1072
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001073 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001074
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001075 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001077 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001078 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001079 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080}
1081
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001082int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001084 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001085 struct drbd_peer_device *peer_device = peer_req->peer_device;
1086 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087 struct digest_info *di;
1088 int digest_size;
1089 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001090 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091
1092 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001093 drbd_free_peer_req(device, peer_req);
1094 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001095 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096 }
1097
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001098 if (get_ldev(device)) {
1099 drbd_rs_complete_io(device, peer_req->i.sector);
1100 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001101 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001103 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001104
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001105 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106 /* quick hack to try to avoid a race against reconfiguration.
1107 * a real fix would be much more involved,
1108 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001109 if (peer_device->connection->csums_tfm) {
1110 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001111 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001112 digest = kmalloc(digest_size, GFP_NOIO);
1113 }
1114 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001115 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116 eq = !memcmp(digest, di->digest, digest_size);
1117 kfree(digest);
1118 }
1119
1120 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001121 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001122 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001123 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001124 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001126 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001127 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1128 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001129 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001130 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131 }
1132 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001133 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001134 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001135 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136 }
1137
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001138 dec_unacked(device);
1139 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001141 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001142 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001143 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144}
1145
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001146int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001148 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001149 struct drbd_peer_device *peer_device = peer_req->peer_device;
1150 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001151 sector_t sector = peer_req->i.sector;
1152 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153 int digest_size;
1154 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001155 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156
1157 if (unlikely(cancel))
1158 goto out;
1159
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001160 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001161 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001162 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001163 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001164 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001165 }
1166
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001167 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001168 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001169 else
1170 memset(digest, 0, digest_size);
1171
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001172 /* Free e and pages before send.
1173 * In case we block on congestion, we could otherwise run into
1174 * some distributed deadlock, if the other side blocks on
1175 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001176 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001177 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001178 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001179 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001180 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001181 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001182 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001183 kfree(digest);
1184
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001186 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001187 drbd_free_peer_req(device, peer_req);
1188 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001189 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190}
1191
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001192void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001193{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001194 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1195 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001197 device->ov_last_oos_start = sector;
1198 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001200 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201}
1202
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001203int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001205 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001206 struct drbd_peer_device *peer_device = peer_req->peer_device;
1207 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001210 sector_t sector = peer_req->i.sector;
1211 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001212 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001213 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001214 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215
1216 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001217 drbd_free_peer_req(device, peer_req);
1218 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001219 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 }
1221
1222 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1223 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001224 if (get_ldev(device)) {
1225 drbd_rs_complete_io(device, peer_req->i.sector);
1226 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001227 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001229 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001231 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001232 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 digest = kmalloc(digest_size, GFP_NOIO);
1234 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001235 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001237 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 eq = !memcmp(digest, di->digest, digest_size);
1239 kfree(digest);
1240 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241 }
1242
Lars Ellenberg9676c762011-02-22 14:02:31 +01001243 /* Free peer_req and pages before send.
1244 * In case we block on congestion, we could otherwise run into
1245 * some distributed deadlock, if the other side blocks on
1246 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001247 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001248 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001249 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001250 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001252 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001254 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001255 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001257 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001259 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001260
1261 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001262 if ((device->ov_left & 0x200) == 0x200)
1263 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001265 stop_sector_reached = verify_can_do_stop_sector(device) &&
1266 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001267
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001268 if (device->ov_left == 0 || stop_sector_reached) {
1269 ov_out_of_sync_print(device);
1270 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271 }
1272
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001273 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274}
1275
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001276/* FIXME
1277 * We need to track the number of pending barrier acks,
1278 * and to be able to wait for them.
1279 * See also comment in drbd_adm_attach before drbd_suspend_io.
1280 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001281static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001283 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001284 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001286 sock = &connection->data;
1287 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001288 if (!p)
1289 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001290 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001291 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001292 connection->send.current_epoch_writes = 0;
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001293 connection->send.last_sent_barrier_jif = jiffies;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001294
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001295 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296}
1297
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001298int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001300 struct drbd_device *device =
1301 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001302 struct drbd_socket *sock;
1303
Philipp Reisnerb411b362009-09-25 16:07:19 -07001304 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001305 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001306 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001307 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001308 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001309 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001310}
1311
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001312static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001313{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001314 if (!connection->send.seen_any_write_yet) {
1315 connection->send.seen_any_write_yet = true;
1316 connection->send.current_epoch_nr = epoch;
1317 connection->send.current_epoch_writes = 0;
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001318 connection->send.last_sent_barrier_jif = jiffies;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001319 }
1320}
1321
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001322static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001323{
1324 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001325 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001326 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001327 if (connection->send.current_epoch_nr != epoch) {
1328 if (connection->send.current_epoch_writes)
1329 drbd_send_barrier(connection);
1330 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001331 }
1332}
1333
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001334int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001335{
1336 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001337 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001338 struct drbd_peer_device *const peer_device = first_peer_device(device);
1339 struct drbd_connection *const connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001340 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001341
1342 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001343 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001344 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001345 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001346 req->pre_send_jif = jiffies;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001347
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001348 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001349 * If it was sent, it was the closing barrier for the last
1350 * replicated epoch, before we went into AHEAD mode.
1351 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001352 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001353
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001354 err = drbd_send_out_of_sync(peer_device, req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001355 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001356
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001357 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001358}
1359
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360/**
1361 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001362 * @w: work object.
1363 * @cancel: The connection will be closed anyways
1364 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001365int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001366{
1367 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001368 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001369 struct drbd_peer_device *const peer_device = first_peer_device(device);
1370 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001371 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001372
1373 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001374 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001375 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001377 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001379 re_init_if_first_write(connection, req->epoch);
1380 maybe_send_barrier(connection, req->epoch);
1381 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001382
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001383 err = drbd_send_dblock(peer_device, req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001384 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001386 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387}
1388
1389/**
1390 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 * @w: work object.
1392 * @cancel: The connection will be closed anyways
1393 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001394int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395{
1396 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001397 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001398 struct drbd_peer_device *const peer_device = first_peer_device(device);
1399 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001400 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401
1402 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001403 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001404 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001406 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001408 /* Even read requests may close a write epoch,
1409 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001410 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001411
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001412 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001413 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001414
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001415 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001416
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001417 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001418}
1419
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001420int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001421{
1422 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001423 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001424
Philipp Reisner07782862010-08-31 12:00:50 +02001425 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01001426 drbd_al_begin_io(device, &req->i);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001427
1428 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001429 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001430 generic_make_request(req->private_bio);
1431
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001432 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001433}
1434
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001435static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001436{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001437 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001438 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001439
1440 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001441 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001442 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001443 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001444 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001445 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001446 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001447 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001448 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001449 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001450 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451 if ((odev->state.conn >= C_SYNC_SOURCE &&
1452 odev->state.conn <= C_PAUSED_SYNC_T) ||
1453 odev->state.aftr_isp || odev->state.peer_isp ||
1454 odev->state.user_isp)
1455 return 0;
1456 }
1457}
1458
1459/**
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001460 * drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001461 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 *
1463 * Called from process context only (admin command and after_state_ch).
1464 */
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001465static bool drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001467 bool changed = false;
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001468 struct drbd_device *odev;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001469 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470
Philipp Reisner695d08f2011-04-11 22:53:32 -07001471 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001472 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1474 continue;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001475 if (!_drbd_may_sync_now(odev) &&
1476 _drbd_set_state(_NS(odev, aftr_isp, 1),
1477 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1478 changed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001480 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001482 return changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483}
1484
1485/**
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001486 * drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001487 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488 *
1489 * Called from process context only (admin command and worker).
1490 */
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001491static bool drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001493 bool changed = false;
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001494 struct drbd_device *odev;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001495 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001496
Philipp Reisner695d08f2011-04-11 22:53:32 -07001497 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001498 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1500 continue;
1501 if (odev->state.aftr_isp) {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001502 if (_drbd_may_sync_now(odev) &&
1503 _drbd_set_state(_NS(odev, aftr_isp, 0),
1504 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1505 changed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506 }
1507 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001508 rcu_read_unlock();
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001509 return changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510}
1511
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001512void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001514 lock_all_resources();
1515 drbd_resume_next(device);
1516 unlock_all_resources();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517}
1518
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001519void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001520{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001521 lock_all_resources();
1522 drbd_pause_after(device);
1523 unlock_all_resources();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001524}
1525
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001526/* caller must lock_all_resources() */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001527enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001529 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001530 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531
1532 if (o_minor == -1)
1533 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001534 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001535 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536
1537 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001538 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001540 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001541 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001543 /* You are free to depend on diskless, non-existing,
1544 * or not yet/no longer existing minors.
1545 * We only reject dependency loops.
1546 * We cannot follow the dependency chain beyond a detached or
1547 * missing minor.
1548 */
1549 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1550 return NO_ERROR;
1551
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001552 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001553 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001554 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001556 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557 return NO_ERROR;
1558
1559 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001560 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001561 }
1562}
1563
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001564/* caller must lock_all_resources() */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001565void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001567 int changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001569 do {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001570 changed = drbd_pause_after(device);
1571 changed |= drbd_resume_next(device);
1572 } while (changed);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573}
1574
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001575void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001576{
Lars Ellenbergff8bd882014-11-10 17:21:12 +01001577 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Philipp Reisner813472c2011-05-03 16:47:02 +02001578 struct fifo_buffer *plan;
1579
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001580 atomic_set(&device->rs_sect_in, 0);
1581 atomic_set(&device->rs_sect_ev, 0);
1582 device->rs_in_flight = 0;
Lars Ellenbergff8bd882014-11-10 17:21:12 +01001583 device->rs_last_events =
1584 (int)part_stat_read(&disk->part0, sectors[0]) +
1585 (int)part_stat_read(&disk->part0, sectors[1]);
Philipp Reisner813472c2011-05-03 16:47:02 +02001586
1587 /* Updating the RCU protected object in place is necessary since
1588 this function gets called from atomic context.
1589 It is valid since all other updates also lead to an completely
1590 empty fifo */
1591 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001592 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001593 plan->total = 0;
1594 fifo_set(plan, 0);
1595 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001596}
1597
Philipp Reisner1f04af32011-02-07 11:33:59 +01001598void start_resync_timer_fn(unsigned long data)
1599{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001600 struct drbd_device *device = (struct drbd_device *) data;
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001601 drbd_device_post_work(device, RS_START);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001602}
1603
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001604static void do_start_resync(struct drbd_device *device)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001605{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001606 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001607 drbd_warn(device, "postponing start_resync ...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001608 device->start_resync_timer.expires = jiffies + HZ/10;
1609 add_timer(&device->start_resync_timer);
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001610 return;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001611 }
1612
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001613 drbd_start_resync(device, C_SYNC_SOURCE);
1614 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001615}
1616
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001617static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1618{
1619 bool csums_after_crash_only;
1620 rcu_read_lock();
1621 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1622 rcu_read_unlock();
1623 return connection->agreed_pro_version >= 89 && /* supported? */
1624 connection->csums_tfm && /* configured? */
1625 (csums_after_crash_only == 0 /* use for each resync? */
1626 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1627}
1628
Philipp Reisnerb411b362009-09-25 16:07:19 -07001629/**
1630 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001631 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1633 *
1634 * This function might bring you directly into one of the
1635 * C_PAUSED_SYNC_* states.
1636 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001637void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001639 struct drbd_peer_device *peer_device = first_peer_device(device);
1640 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641 union drbd_state ns;
1642 int r;
1643
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001644 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001645 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646 return;
1647 }
1648
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001649 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001650 if (side == C_SYNC_TARGET) {
1651 /* Since application IO was locked out during C_WF_BITMAP_T and
1652 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1653 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001654 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001655 r = (r >> 8) & 0xff;
1656 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001657 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001658 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001659 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001660 return;
1661 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001662 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001663 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001664 r = (r >> 8) & 0xff;
1665 if (r > 0) {
1666 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001667 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001668 "ignoring. Old userland tools?", r);
1669 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001670 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001671 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001672 conn_request_state(connection,
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001673 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001674 return;
1675 }
1676 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001677 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678 }
1679
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001680 if (current == connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001681 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001682 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001683 if (!mutex_trylock(device->state_mutex)) {
1684 set_bit(B_RS_H_DONE, &device->flags);
1685 device->start_resync_timer.expires = jiffies + HZ/5;
1686 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001687 return;
1688 }
1689 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001690 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001691 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001693 lock_all_resources();
1694 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnera7004712013-03-27 14:08:35 +01001695 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001696 if (device->state.conn < C_CONNECTED
1697 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001698 unlock_all_resources();
1699 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700 }
1701
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001702 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001704 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705
1706 ns.conn = side;
1707
1708 if (side == C_SYNC_TARGET)
1709 ns.disk = D_INCONSISTENT;
1710 else /* side == C_SYNC_SOURCE */
1711 ns.pdsk = D_INCONSISTENT;
1712
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001713 r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001714 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715
1716 if (ns.conn < C_CONNECTED)
1717 r = SS_UNKNOWN_ERROR;
1718
1719 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001720 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001721 unsigned long now = jiffies;
1722 int i;
1723
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001724 device->rs_failed = 0;
1725 device->rs_paused = 0;
1726 device->rs_same_csum = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001727 device->rs_last_sect_ev = 0;
1728 device->rs_total = tw;
1729 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001730 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001731 device->rs_mark_left[i] = tw;
1732 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001733 }
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001734 drbd_pause_after(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001735 /* Forget potentially stale cached per resync extent bit-counts.
1736 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1737 * disabled, and know the disk state is ok. */
1738 spin_lock(&device->al_lock);
1739 lc_reset(device->resync);
1740 device->resync_locked = 0;
1741 device->resync_wenr = LC_FREE;
1742 spin_unlock(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743 }
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001744 unlock_all_resources();
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001745
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746 if (r == SS_SUCCESS) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001747 wake_up(&device->al_wait); /* for lc_reset() above */
Philipp Reisner328e0f12012-10-19 14:37:47 +02001748 /* reset rs_last_bcast when a resync or verify is started,
1749 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001750 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001751
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001752 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001754 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1755 (unsigned long) device->rs_total);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001756 if (side == C_SYNC_TARGET) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001757 device->bm_resync_fo = 0;
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001758 device->use_csums = use_checksum_based_resync(connection, device);
1759 } else {
1760 device->use_csums = 0;
1761 }
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001762
1763 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1764 * with w_send_oos, or the sync target will get confused as to
1765 * how much bits to resync. We cannot do that always, because for an
1766 * empty resync and protocol < 95, we need to do it here, as we call
1767 * drbd_resync_finished from here in that case.
1768 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1769 * and from after_state_ch otherwise. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001770 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1771 drbd_gen_and_send_sync_uuid(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001773 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001774 /* This still has a race (about when exactly the peers
1775 * detect connection loss) that can lead to a full sync
1776 * on next handshake. In 8.3.9 we fixed this with explicit
1777 * resync-finished notifications, but the fix
1778 * introduces a protocol change. Sleeping for some
1779 * time longer than the ping interval + timeout on the
1780 * SyncSource, to give the SyncTarget the chance to
1781 * detect connection loss, then waiting for a ping
1782 * response (implicit in drbd_resync_finished) reduces
1783 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001784 if (side == C_SYNC_SOURCE) {
1785 struct net_conf *nc;
1786 int timeo;
1787
1788 rcu_read_lock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001789 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001790 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1791 rcu_read_unlock();
1792 schedule_timeout_interruptible(timeo);
1793 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001794 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 }
1796
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001797 drbd_rs_controller_reset(device);
1798 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 * we may have been paused in between, or become paused until
1800 * the timer triggers.
1801 * No matter, that is handled in resync_timer_fn() */
1802 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001803 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001805 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001807 put_ldev(device);
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001808out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001809 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810}
1811
Lars Ellenberge334f552014-02-11 09:30:49 +01001812static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001813{
1814 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1815 device->rs_last_bcast = jiffies;
1816
1817 if (!get_ldev(device))
1818 return;
1819
1820 drbd_bm_write_lazy(device, 0);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001821 if (resync_done && is_sync_state(device->state.conn))
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001822 drbd_resync_finished(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001823
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001824 drbd_bcast_event(device, &sib);
1825 /* update timestamp, in case it took a while to write out stuff */
1826 device->rs_last_bcast = jiffies;
1827 put_ldev(device);
1828}
1829
Lars Ellenberge334f552014-02-11 09:30:49 +01001830static void drbd_ldev_destroy(struct drbd_device *device)
1831{
1832 lc_destroy(device->resync);
1833 device->resync = NULL;
1834 lc_destroy(device->act_log);
1835 device->act_log = NULL;
Andreas Gruenbacherd1b80852014-09-11 14:29:09 +02001836
1837 __acquire(local);
1838 drbd_free_ldev(device->ldev);
1839 device->ldev = NULL;
1840 __release(local);
1841
Lars Ellenberge334f552014-02-11 09:30:49 +01001842 clear_bit(GOING_DISKLESS, &device->flags);
1843 wake_up(&device->misc_wait);
1844}
1845
1846static void go_diskless(struct drbd_device *device)
1847{
1848 D_ASSERT(device, device->state.disk == D_FAILED);
1849 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1850 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1851 * the protected members anymore, though, so once put_ldev reaches zero
1852 * again, it will be safe to free them. */
1853
1854 /* Try to write changed bitmap pages, read errors may have just
1855 * set some bits outside the area covered by the activity log.
1856 *
1857 * If we have an IO error during the bitmap writeout,
1858 * we will want a full sync next time, just in case.
1859 * (Do we want a specific meta data flag for this?)
1860 *
1861 * If that does not make it to stable storage either,
1862 * we cannot do anything about that anymore.
1863 *
1864 * We still need to check if both bitmap and ldev are present, we may
1865 * end up here after a failed attach, before ldev was even assigned.
1866 */
1867 if (device->bitmap && device->ldev) {
1868 /* An interrupted resync or similar is allowed to recounts bits
1869 * while we detach.
1870 * Any modifications would not be expected anymore, though.
1871 */
1872 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1873 "detach", BM_LOCKED_TEST_ALLOWED)) {
1874 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1875 drbd_md_set_flag(device, MDF_FULL_SYNC);
1876 drbd_md_sync(device);
1877 }
1878 }
1879 }
1880
1881 drbd_force_state(device, NS(disk, D_DISKLESS));
1882}
1883
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001884static int do_md_sync(struct drbd_device *device)
1885{
1886 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1887 drbd_md_sync(device);
1888 return 0;
1889}
1890
Lars Ellenberg944410e2014-05-06 15:02:05 +02001891/* only called from drbd_worker thread, no locking */
1892void __update_timing_details(
1893 struct drbd_thread_timing_details *tdp,
1894 unsigned int *cb_nr,
1895 void *cb,
1896 const char *fn, const unsigned int line)
1897{
1898 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1899 struct drbd_thread_timing_details *td = tdp + i;
1900
1901 td->start_jif = jiffies;
1902 td->cb_addr = cb;
1903 td->caller_fn = fn;
1904 td->line = line;
1905 td->cb_nr = *cb_nr;
1906
1907 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1908 td = tdp + i;
1909 memset(td, 0, sizeof(*td));
1910
1911 ++(*cb_nr);
1912}
1913
Lars Ellenberge334f552014-02-11 09:30:49 +01001914static void do_device_work(struct drbd_device *device, const unsigned long todo)
1915{
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001916 if (test_bit(MD_SYNC, &todo))
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001917 do_md_sync(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001918 if (test_bit(RS_DONE, &todo) ||
1919 test_bit(RS_PROGRESS, &todo))
1920 update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
1921 if (test_bit(GO_DISKLESS, &todo))
Lars Ellenberge334f552014-02-11 09:30:49 +01001922 go_diskless(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001923 if (test_bit(DESTROY_DISK, &todo))
Lars Ellenberge334f552014-02-11 09:30:49 +01001924 drbd_ldev_destroy(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001925 if (test_bit(RS_START, &todo))
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001926 do_start_resync(device);
Lars Ellenberge334f552014-02-11 09:30:49 +01001927}
1928
1929#define DRBD_DEVICE_WORK_MASK \
1930 ((1UL << GO_DISKLESS) \
1931 |(1UL << DESTROY_DISK) \
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001932 |(1UL << MD_SYNC) \
1933 |(1UL << RS_START) \
Lars Ellenberge334f552014-02-11 09:30:49 +01001934 |(1UL << RS_PROGRESS) \
1935 |(1UL << RS_DONE) \
1936 )
1937
1938static unsigned long get_work_bits(unsigned long *flags)
1939{
1940 unsigned long old, new;
1941 do {
1942 old = *flags;
1943 new = old & ~DRBD_DEVICE_WORK_MASK;
1944 } while (cmpxchg(flags, old, new) != old);
1945 return old & DRBD_DEVICE_WORK_MASK;
1946}
1947
1948static void do_unqueued_work(struct drbd_connection *connection)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001949{
1950 struct drbd_peer_device *peer_device;
1951 int vnr;
1952
1953 rcu_read_lock();
1954 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1955 struct drbd_device *device = peer_device->device;
Lars Ellenberge334f552014-02-11 09:30:49 +01001956 unsigned long todo = get_work_bits(&device->flags);
1957 if (!todo)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001958 continue;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001959
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001960 kref_get(&device->kref);
1961 rcu_read_unlock();
Lars Ellenberge334f552014-02-11 09:30:49 +01001962 do_device_work(device, todo);
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001963 kref_put(&device->kref, drbd_destroy_device);
1964 rcu_read_lock();
1965 }
1966 rcu_read_unlock();
1967}
1968
Rashika Kheriaa186e472013-12-19 15:06:10 +05301969static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001970{
1971 spin_lock_irq(&queue->q_lock);
Lars Ellenberg15e26f62014-04-28 11:43:21 +02001972 list_splice_tail_init(&queue->q, work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001973 spin_unlock_irq(&queue->q_lock);
1974 return !list_empty(work_list);
1975}
1976
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001977static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001978{
1979 DEFINE_WAIT(wait);
1980 struct net_conf *nc;
1981 int uncork, cork;
1982
Lars Ellenbergabde9cc2014-09-11 14:29:11 +02001983 dequeue_work_batch(&connection->sender_work, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001984 if (!list_empty(work_list))
1985 return;
1986
1987 /* Still nothing to do?
1988 * Maybe we still need to close the current epoch,
1989 * even if no new requests are queued yet.
1990 *
1991 * Also, poke TCP, just in case.
1992 * Then wait for new work (or signal). */
1993 rcu_read_lock();
1994 nc = rcu_dereference(connection->net_conf);
1995 uncork = nc ? nc->tcp_cork : 0;
1996 rcu_read_unlock();
1997 if (uncork) {
1998 mutex_lock(&connection->data.mutex);
1999 if (connection->data.socket)
2000 drbd_tcp_uncork(connection->data.socket);
2001 mutex_unlock(&connection->data.mutex);
2002 }
2003
2004 for (;;) {
2005 int send_barrier;
2006 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002007 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002008 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02002009 if (!list_empty(&connection->sender_work.q))
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002010 list_splice_tail_init(&connection->sender_work.q, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002011 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
2012 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002013 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002014 break;
2015 }
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002016
2017 /* We found nothing new to do, no to-be-communicated request,
2018 * no other work item. We may still need to close the last
2019 * epoch. Next incoming request epoch will be connection ->
2020 * current transfer log epoch number. If that is different
2021 * from the epoch of the last request we communicated, it is
2022 * safe to send the epoch separating barrier now.
2023 */
2024 send_barrier =
2025 atomic_read(&connection->current_tle_nr) !=
2026 connection->send.current_epoch_nr;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002027 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002028
2029 if (send_barrier)
2030 maybe_send_barrier(connection,
2031 connection->send.current_epoch_nr + 1);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002032
Lars Ellenberge334f552014-02-11 09:30:49 +01002033 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002034 break;
2035
Lars Ellenberga80ca1a2013-12-27 17:17:25 +01002036 /* drbd_send() may have called flush_signals() */
2037 if (get_t_state(&connection->worker) != RUNNING)
2038 break;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002039
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002040 schedule();
2041 /* may be woken up for other things but new work, too,
2042 * e.g. if the current epoch got closed.
2043 * In which case we send the barrier above. */
2044 }
2045 finish_wait(&connection->sender_work.q_wait, &wait);
2046
2047 /* someone may have changed the config while we have been waiting above. */
2048 rcu_read_lock();
2049 nc = rcu_dereference(connection->net_conf);
2050 cork = nc ? nc->tcp_cork : 0;
2051 rcu_read_unlock();
2052 mutex_lock(&connection->data.mutex);
2053 if (connection->data.socket) {
2054 if (cork)
2055 drbd_tcp_cork(connection->data.socket);
2056 else if (!uncork)
2057 drbd_tcp_uncork(connection->data.socket);
2058 }
2059 mutex_unlock(&connection->data.mutex);
2060}
2061
Philipp Reisnerb411b362009-09-25 16:07:19 -07002062int drbd_worker(struct drbd_thread *thi)
2063{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002064 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002065 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002066 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002067 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002068 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002069
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002070 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01002071 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002072
Lars Ellenberg944410e2014-05-06 15:02:05 +02002073 if (list_empty(&work_list)) {
2074 update_worker_timing_details(connection, wait_for_work);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002075 wait_for_work(connection, &work_list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002076 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002077
Lars Ellenberg944410e2014-05-06 15:02:05 +02002078 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2079 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002080 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002081 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002082
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002083 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002084 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01002085 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02002086 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002087 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01002088 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002089 break;
2090 }
2091
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002092 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002093 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002095 if (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002096 w = list_first_entry(&work_list, struct drbd_work, list);
2097 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002098 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002099 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002100 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002101 if (connection->cstate >= C_WF_REPORT_PARAMS)
2102 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002103 }
2104 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002105
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002106 do {
Lars Ellenberg944410e2014-05-06 15:02:05 +02002107 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2108 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002109 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002110 }
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002111 if (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002112 w = list_first_entry(&work_list, struct drbd_work, list);
2113 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002114 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002115 w->cb(w, 1);
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002116 } else
2117 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberge334f552014-02-11 09:30:49 +01002118 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002119
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002120 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002121 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2122 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002123 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002124 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002125 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002126 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02002127 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002128 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01002129 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002130 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002131
2132 return 0;
2133}