blob: 9c89ebe21c6bfbba184275d76fe54cbc7ca798d5 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
Andreas Gruenbachered15b792014-09-11 14:29:06 +020046 * drbd_md_endio (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbachered15b792014-09-11 14:29:06 +020049 * drbd_bm_endio (defined in drbd_bitmap.c)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010050 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
Philipp Reisnerb411b362009-09-25 16:07:19 -070058/* used for synchronous meta data and bitmap IO
59 * submitted by drbd_md_sync_page_io()
60 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +020061void drbd_md_endio(struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -070062{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020063 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070064
Lars Ellenberge37d2432014-04-01 23:53:30 +020065 device = bio->bi_private;
Christoph Hellwig4246a0b2015-07-20 15:29:37 +020066 device->md_io.error = bio->bi_error;
Philipp Reisnerb411b362009-09-25 16:07:19 -070067
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010068 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
69 * to timeout on the lower level device, and eventually detach from it.
70 * If this io completion runs after that timeout expired, this
71 * drbd_md_put_buffer() may allow us to finally try and re-attach.
72 * During normal operation, this only puts that extra reference
73 * down to 1 again.
74 * Make sure we first drop the reference, and only then signal
75 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
76 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020077 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020079 drbd_md_put_buffer(device);
Lars Ellenberge37d2432014-04-01 23:53:30 +020080 device->md_io.done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020081 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020082 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020083 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
84 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070085}
86
87/* reads on behalf of the partner,
88 * "submitted" by the receiver
89 */
Rashika Kheriaa186e472013-12-19 15:06:10 +053090static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -070091{
92 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +020093 struct drbd_peer_device *peer_device = peer_req->peer_device;
94 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070095
Andreas Gruenbacher05008132011-07-07 14:19:42 +020096 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020097 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +020098 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020099 if (list_empty(&device->read_ee))
100 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100101 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200102 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200103 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700104
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200105 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200106 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700107}
108
109/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200110 * "submitted" by the receiver, final stage. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200111void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700112{
113 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200114 struct drbd_peer_device *peer_device = peer_req->peer_device;
115 struct drbd_device *device = peer_device->device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200116 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700117 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100118 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700119 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700120
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100121 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122 * we may no longer access it,
123 * it may be freed/reused already!
124 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200125 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100126 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
127 block_id = peer_req->block_id;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200128 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200130 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200131 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200132 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700133
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100134 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100135 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100136 * Ack yet and did not wake possibly waiting conflicting requests.
137 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200138 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100139 * _drbd_clear_done_ee.
140 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700141
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200142 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700143
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200144 /* FIXME do we want to detach for failed REQ_DISCARD?
145 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
146 if (peer_req->flags & EE_WAS_ERROR)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200147 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200148 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700149
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100150 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200151 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152
153 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700155
156 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200157 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200159 wake_asender(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200160 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200161}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163/* writes on behalf of the partner, or resync writes,
164 * "submitted" by the receiver.
165 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200166void drbd_peer_request_endio(struct bio *bio)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100168 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200169 struct drbd_device *device = peer_req->peer_device->device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 int is_write = bio_data_dir(bio) == WRITE;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200171 int is_discard = !!(bio->bi_rw & REQ_DISCARD);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200173 if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200174 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200175 is_write ? (is_discard ? "discard" : "write")
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200176 : "read", bio->bi_error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100177 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200179 if (bio->bi_error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100180 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200181
182 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100183 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200184 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100185 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200186 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100187 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200188 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700189}
190
Lars Ellenberg142207f2015-02-19 13:48:59 +0100191void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
192{
193 panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
194 device->minor, device->resource->name, device->vnr);
195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
198 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200199void drbd_request_endio(struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200{
Lars Ellenberga1154132010-11-13 20:42:29 +0100201 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200203 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100204 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 enum drbd_req_event what;
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200206
207 /* If this request was aborted locally before,
208 * but now was completed "successfully",
209 * chances are that this caused arbitrary data corruption.
210 *
211 * "aborting" requests, or force-detaching the disk, is intended for
212 * completely blocked/hung local backing devices which do no longer
213 * complete requests at all, not even do error completions. In this
214 * situation, usually a hard-reset and failover is the only way out.
215 *
216 * By "aborting", basically faking a local error-completion,
217 * we allow for a more graceful swichover by cleanly migrating services.
218 * Still the affected node has to be rebooted "soon".
219 *
220 * By completing these requests, we allow the upper layers to re-use
221 * the associated data pages.
222 *
223 * If later the local backing device "recovers", and now DMAs some data
224 * from disk into the original request pages, in the best case it will
225 * just put random data into unused pages; but typically it will corrupt
226 * meanwhile completely unrelated data, causing all sorts of damage.
227 *
228 * Which means delayed successful completion,
229 * especially for READ requests,
230 * is a reason to panic().
231 *
232 * We assume that a delayed *error* completion is OK,
233 * though we still will complain noisily about it.
234 */
235 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
236 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200237 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200238
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200239 if (!bio->bi_error)
Lars Ellenberg142207f2015-02-19 13:48:59 +0100240 drbd_panic_after_delayed_completion_of_aborted_request(device);
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200241 }
242
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243 /* to avoid recursion in __req_mod */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200244 if (unlikely(bio->bi_error)) {
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200245 if (bio->bi_rw & REQ_DISCARD)
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200246 what = (bio->bi_error == -EOPNOTSUPP)
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200247 ? DISCARD_COMPLETED_NOTSUPP
248 : DISCARD_COMPLETED_WITH_ERROR;
249 else
250 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100251 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200252 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100253 ? READ_COMPLETED_WITH_ERROR
254 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100256 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257
258 bio_put(req->private_bio);
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200259 req->private_bio = ERR_PTR(bio->bi_error);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberga1154132010-11-13 20:42:29 +0100261 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200262 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100263 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200264 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100266
267 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200268 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269}
270
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200271void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200272{
273 struct hash_desc desc;
274 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100275 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200276 struct page *tmp;
277 unsigned len;
278
279 desc.tfm = tfm;
280 desc.flags = 0;
281
282 sg_init_table(&sg, 1);
283 crypto_hash_init(&desc);
284
285 while ((tmp = page_chain_next(page))) {
286 /* all but the last page will be fully used */
287 sg_set_page(&sg, page, PAGE_SIZE, 0);
288 crypto_hash_update(&desc, &sg, sg.length);
289 page = tmp;
290 }
291 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100292 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
294 crypto_hash_update(&desc, &sg, sg.length);
295 crypto_hash_final(&desc, digest);
296}
297
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200298void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700299{
300 struct hash_desc desc;
301 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800302 struct bio_vec bvec;
303 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304
305 desc.tfm = tfm;
306 desc.flags = 0;
307
308 sg_init_table(&sg, 1);
309 crypto_hash_init(&desc);
310
Kent Overstreet79886132013-11-23 17:19:00 -0800311 bio_for_each_segment(bvec, bio, iter) {
312 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313 crypto_hash_update(&desc, &sg, sg.length);
314 }
315 crypto_hash_final(&desc, digest);
316}
317
Lars Ellenberg9676c762011-02-22 14:02:31 +0100318/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100319static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200321 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200322 struct drbd_peer_device *peer_device = peer_req->peer_device;
323 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 int digest_size;
325 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100326 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100328 if (unlikely(cancel))
329 goto out;
330
Lars Ellenberg9676c762011-02-22 14:02:31 +0100331 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100332 goto out;
333
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200334 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100335 digest = kmalloc(digest_size, GFP_NOIO);
336 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100337 sector_t sector = peer_req->i.sector;
338 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200339 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100340 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100341 * In case we block on congestion, we could otherwise run into
342 * some distributed deadlock, if the other side blocks on
343 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200344 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200345 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200347 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200348 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100349 digest, digest_size,
350 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100351 kfree(digest);
352 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200353 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100354 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 }
356
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100357out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200359 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700360
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100361 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200362 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100363 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364}
365
366#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
367
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200368static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200370 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200373 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200374 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
376 /* GFP_TRY, because if there is no memory available right now, this may
377 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200378 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200379 size, true /* has real payload */, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200381 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200383 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200384 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +0200385 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200386 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388 atomic_add(size >> 9, &device->rs_sect_ev);
389 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200390 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100392 /* If it failed because of ENOMEM, retry should help. If it failed
393 * because bio_add_page failed (probably broken lower level driver),
394 * retry may or may not help.
395 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200396 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200397 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200398 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200401defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200402 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200403 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404}
405
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100406int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100407{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200408 struct drbd_device *device =
409 container_of(w, struct drbd_device, resync_work);
410
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100412 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200413 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100414 break;
415 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200416 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100417 break;
418 }
419
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100420 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100421}
422
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423void resync_timer_fn(unsigned long data)
424{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200425 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
Lars Ellenberg15e26f62014-04-28 11:43:21 +0200427 drbd_queue_work_if_unqueued(
428 &first_peer_device(device)->connection->sender_work,
429 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430}
431
Philipp Reisner778f2712010-07-06 11:14:00 +0200432static void fifo_set(struct fifo_buffer *fb, int value)
433{
434 int i;
435
436 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200437 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200438}
439
440static int fifo_push(struct fifo_buffer *fb, int value)
441{
442 int ov;
443
444 ov = fb->values[fb->head_index];
445 fb->values[fb->head_index++] = value;
446
447 if (fb->head_index >= fb->size)
448 fb->head_index = 0;
449
450 return ov;
451}
452
453static void fifo_add_val(struct fifo_buffer *fb, int value)
454{
455 int i;
456
457 for (i = 0; i < fb->size; i++)
458 fb->values[i] += value;
459}
460
Philipp Reisner9958c852011-05-03 16:19:31 +0200461struct fifo_buffer *fifo_alloc(int fifo_size)
462{
463 struct fifo_buffer *fb;
464
Lars Ellenberg8747d302012-09-26 14:22:40 +0200465 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200466 if (!fb)
467 return NULL;
468
469 fb->head_index = 0;
470 fb->size = fifo_size;
471 fb->total = 0;
472
473 return fb;
474}
475
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200476static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
Philipp Reisner778f2712010-07-06 11:14:00 +0200477{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200478 struct disk_conf *dc;
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200479 unsigned int want; /* The number of sectors we want in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200480 int req_sect; /* Number of sectors to request in this turn */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200481 int correction; /* Number of sectors more we need in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200482 int cps; /* correction per invocation of drbd_rs_controller() */
483 int steps; /* Number of time steps to plan ahead */
484 int curr_corr;
485 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200486 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200487
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200488 dc = rcu_dereference(device->ldev->disk_conf);
489 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200490
Philipp Reisner813472c2011-05-03 16:47:02 +0200491 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200492
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200493 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200494 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200495 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200496 want = dc->c_fill_target ? dc->c_fill_target :
497 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200498 }
499
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200500 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200501
502 /* Plan ahead */
503 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200504 fifo_add_val(plan, cps);
505 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200506
507 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200508 curr_corr = fifo_push(plan, 0);
509 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200510
511 req_sect = sect_in + curr_corr;
512 if (req_sect < 0)
513 req_sect = 0;
514
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200515 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200516 if (req_sect > max_sect)
517 req_sect = max_sect;
518
519 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200520 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200521 sect_in, device->rs_in_flight, want, correction,
522 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200523 */
524
525 return req_sect;
526}
527
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200528static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100529{
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200530 unsigned int sect_in; /* Number of sectors that came in since the last turn */
531 int number, mxb;
532
533 sect_in = atomic_xchg(&device->rs_sect_in, 0);
534 device->rs_in_flight -= sect_in;
Philipp Reisner813472c2011-05-03 16:47:02 +0200535
536 rcu_read_lock();
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200537 mxb = drbd_get_max_buffers(device) / 2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200538 if (rcu_dereference(device->rs_plan_s)->size) {
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200539 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200540 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100541 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200542 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
543 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100544 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200545 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100546
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200547 /* Don't have more than "max-buffers"/2 in-flight.
548 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
549 * potentially causing a distributed deadlock on congestion during
550 * online-verify or (checksum-based) resync, if max-buffers,
551 * socket buffer sizes and resync rate settings are mis-configured. */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200552
553 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
554 * mxb (as used here, and in drbd_alloc_pages on the peer) is
555 * "number of pages" (typically also 4k),
556 * but "rs_in_flight" is in "sectors" (512 Byte). */
557 if (mxb - device->rs_in_flight/8 < number)
558 number = mxb - device->rs_in_flight/8;
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200559
Lars Ellenberge65f4402010-11-05 10:04:07 +0100560 return number;
561}
562
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100563static int make_resync_request(struct drbd_device *const device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564{
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100565 struct drbd_peer_device *const peer_device = first_peer_device(device);
566 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567 unsigned long bit;
568 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200569 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100570 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100571 int number, rollback_i, size;
Lars Ellenberg506afb62014-01-31 14:55:12 +0100572 int align, requeue = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200573 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574
575 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100576 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700577
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200578 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200579 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200580 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100581 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200582 }
583
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200584 if (!get_ldev(device)) {
585 /* Since we only need to access device->rsync a
586 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 to continue resync with a broken disk makes no sense at
588 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200589 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100590 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 }
592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200593 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
594 number = drbd_rs_number_requests(device);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200595 if (number <= 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200596 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
Philipp Reisnerb411b362009-09-25 16:07:19 -0700598 for (i = 0; i < number; i++) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100599 /* Stop generating RS requests when half of the send buffer is filled,
600 * but notify TCP that we'd like to have more space. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100601 mutex_lock(&connection->data.mutex);
602 if (connection->data.socket) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100603 struct sock *sk = connection->data.socket->sk;
604 int queued = sk->sk_wmem_queued;
605 int sndbuf = sk->sk_sndbuf;
606 if (queued > sndbuf / 2) {
607 requeue = 1;
608 if (sk->sk_socket)
609 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
610 }
611 } else
612 requeue = 1;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100613 mutex_unlock(&connection->data.mutex);
Lars Ellenberg506afb62014-01-31 14:55:12 +0100614 if (requeue)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700615 goto requeue;
616
617next_sector:
618 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200619 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100621 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200622 device->bm_resync_fo = drbd_bm_bits(device);
623 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100624 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625 }
626
627 sector = BM_BIT_TO_SECT(bit);
628
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100629 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200630 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631 goto requeue;
632 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200633 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700634
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200635 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
636 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700637 goto next_sector;
638 }
639
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100640#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641 /* try to find some adjacent bits.
642 * we stop if we have already the maximum req size.
643 *
644 * Additionally always align bigger requests, in order to
645 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700646 */
647 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200648 rollback_i = i;
Lars Ellenberg6377b922014-04-28 18:43:17 +0200649 while (i < number) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100650 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 break;
652
653 /* Be always aligned */
654 if (sector & ((1<<(align+3))-1))
655 break;
656
657 /* do not cross extent boundaries */
658 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
659 break;
660 /* now, is it actually dirty, after all?
661 * caution, drbd_bm_test_bit is tri-state for some
662 * obscure reason; ( b == 0 ) would get the out-of-band
663 * only accidentally right because of the "oddly sized"
664 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200665 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666 break;
667 bit++;
668 size += BM_BLOCK_SIZE;
669 if ((BM_BLOCK_SIZE << align) <= size)
670 align++;
671 i++;
672 }
673 /* if we merged some,
674 * reset the offset to start the next drbd_bm_find_next from */
675 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200676 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677#endif
678
679 /* adjust very last sectors, in case we are oddly sized */
680 if (sector + (size>>9) > capacity)
681 size = (capacity-sector)<<9;
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100682
683 if (device->use_csums) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100684 switch (read_for_csum(peer_device, sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200685 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200686 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100687 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200688 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200689 drbd_rs_complete_io(device, sector);
690 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200691 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200693 case 0:
694 /* everything ok */
695 break;
696 default:
697 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100700 int err;
701
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200702 inc_rs_pending(device);
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100703 err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100704 sector, size, ID_SYNCER);
705 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200706 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200707 dec_rs_pending(device);
708 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100709 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 }
711 }
712 }
713
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200714 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 /* last syncer _request_ was sent,
716 * but the P_RS_DATA_REPLY not yet received. sync will end (and
717 * next sync group will resume), as soon as we receive the last
718 * resync data block, and the last bit is cleared.
719 * until then resync "work" is "inactive" ...
720 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200721 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100722 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723 }
724
725 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200726 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
727 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
728 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100729 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730}
731
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200732static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733{
734 int number, i, size;
735 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200736 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200737 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (unlikely(cancel))
740 return 1;
741
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200742 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700743
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200744 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200746 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700747 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200748
749 /* We check for "finished" only in the reply path:
750 * w_e_end_ov_reply().
751 * We need to send at least one request out. */
752 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200753 && verify_can_do_stop_sector(device)
754 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200755 if (stop_sector_reached)
756 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757
758 size = BM_BLOCK_SIZE;
759
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100760 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200761 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700762 goto requeue;
763 }
764
765 if (sector + (size>>9) > capacity)
766 size = (capacity-sector)<<9;
767
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200768 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200769 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200770 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 return 0;
772 }
773 sector += BM_SECT_PER_BIT;
774 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200775 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200778 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200779 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200780 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781 return 1;
782}
783
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100784int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200786 struct drbd_device_work *dw =
787 container_of(w, struct drbd_device_work, w);
788 struct drbd_device *device = dw->device;
789 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200790 ov_out_of_sync_print(device);
791 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100793 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794}
795
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100796static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200798 struct drbd_device_work *dw =
799 container_of(w, struct drbd_device_work, w);
800 struct drbd_device *device = dw->device;
801 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700802
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200803 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100805 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806}
807
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200808static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200809{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200810 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100811
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200812 clear_bit(GOT_PING_ACK, &connection->flags);
813 request_ping(connection);
814 wait_event(connection->ping_wait,
815 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200816}
817
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200818int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819{
820 unsigned long db, dt, dbdt;
821 unsigned long n_oos;
822 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200823 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100825 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826
827 /* Remove all elements from the resync LRU. Since future actions
828 * might set bits in the (main) bitmap, then the entries in the
829 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200830 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 /* In case this is not possible now, most probably because
832 * there are P_RS_DATA_REPLY Packets lingering on the worker's
833 * queue (or even the read operations for those packets
834 * is not finished by now). Retry in 100ms. */
835
Philipp Reisner20ee6392011-01-18 15:28:59 +0100836 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200837 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
838 if (dw) {
839 dw->w.cb = w_resync_finished;
840 dw->device = device;
841 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
842 &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843 return 1;
844 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200845 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700846 }
847
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200848 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 if (dt <= 0)
850 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200851
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200852 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200853 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200854 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
855 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200856
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200858 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200860 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861 goto out;
862
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200863 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200864
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200865 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200866 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867
Lars Ellenberg26525612010-11-05 09:56:33 +0100868 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
869
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 /* This protects us against multiple calls (that can happen in the presence
871 of application IO), and against connectivity loss just before we arrive here. */
872 if (os.conn <= C_CONNECTED)
873 goto out_unlock;
874
875 ns = os;
876 ns.conn = C_CONNECTED;
877
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200878 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200879 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200880 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200882 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883
884 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
885 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200886 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887 n_oos, Bit2KB(1));
888 khelper_cmd = "out-of-sync";
889 }
890 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200891 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892
893 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
894 khelper_cmd = "after-resync-target";
895
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100896 if (device->use_csums && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200897 const unsigned long s = device->rs_same_csum;
898 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899 const int ratio =
900 (t == 0) ? 0 :
901 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200902 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903 "transferred %luK total %luK\n",
904 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200905 Bit2KB(device->rs_same_csum),
906 Bit2KB(device->rs_total - device->rs_same_csum),
907 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908 }
909 }
910
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200911 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200912 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913
914 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
915 ns.disk = D_INCONSISTENT;
916 ns.pdsk = D_UP_TO_DATE;
917 } else {
918 ns.disk = D_UP_TO_DATE;
919 ns.pdsk = D_INCONSISTENT;
920 }
921 } else {
922 ns.disk = D_UP_TO_DATE;
923 ns.pdsk = D_UP_TO_DATE;
924
925 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200926 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 int i;
928 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200929 _drbd_uuid_set(device, i, device->p_uuid[i]);
930 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
931 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200933 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 }
935 }
936
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100937 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
938 /* for verify runs, we don't update uuids here,
939 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200940 drbd_uuid_set_bm(device, 0UL);
941 drbd_print_uuids(device, "updated UUIDs");
942 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100943 /* Now the two UUID sets are equal, update what we
944 * know of the peer. */
945 int i;
946 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200947 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100948 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700949 }
950 }
951
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200952 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200954 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200955 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200957 device->rs_total = 0;
958 device->rs_failed = 0;
959 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200960
961 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200962 if (verify_done && device->ov_left == 0)
963 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200965 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200966
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200968 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969
970 return 1;
971}
972
973/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200974static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200976 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100978 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200979 atomic_add(i, &device->pp_in_use_by_net);
980 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200981 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200982 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200983 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200984 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200986 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987}
988
989/**
990 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200991 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 * @w: work object.
993 * @cancel: The connection will be closed anyways
994 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100995int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200997 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200998 struct drbd_peer_device *peer_device = peer_req->peer_device;
999 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001000 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001
1002 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001003 drbd_free_peer_req(device, peer_req);
1004 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001005 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006 }
1007
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001008 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001009 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010 } else {
1011 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001012 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001013 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001015 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 }
1017
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001018 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001022 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001023 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001024 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025}
1026
1027/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001028 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 * @w: work object.
1030 * @cancel: The connection will be closed anyways
1031 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001032int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001034 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001035 struct drbd_peer_device *peer_device = peer_req->peer_device;
1036 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001037 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038
1039 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001040 drbd_free_peer_req(device, peer_req);
1041 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001042 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043 }
1044
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001045 if (get_ldev_if_state(device, D_FAILED)) {
1046 drbd_rs_complete_io(device, peer_req->i.sector);
1047 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 }
1049
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001050 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001051 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001052 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001053 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1054 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001055 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056 } else {
1057 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001058 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001060 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 }
1062 } else {
1063 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001064 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001065 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001067 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068
1069 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001070 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071 }
1072
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001073 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001074
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001075 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001077 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001078 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001079 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080}
1081
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001082int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001084 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001085 struct drbd_peer_device *peer_device = peer_req->peer_device;
1086 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087 struct digest_info *di;
1088 int digest_size;
1089 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001090 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091
1092 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001093 drbd_free_peer_req(device, peer_req);
1094 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001095 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096 }
1097
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001098 if (get_ldev(device)) {
1099 drbd_rs_complete_io(device, peer_req->i.sector);
1100 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001101 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001103 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001104
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001105 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106 /* quick hack to try to avoid a race against reconfiguration.
1107 * a real fix would be much more involved,
1108 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001109 if (peer_device->connection->csums_tfm) {
1110 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001111 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001112 digest = kmalloc(digest_size, GFP_NOIO);
1113 }
1114 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001115 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116 eq = !memcmp(digest, di->digest, digest_size);
1117 kfree(digest);
1118 }
1119
1120 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001121 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001122 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001123 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001124 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001126 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001127 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1128 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001129 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001130 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131 }
1132 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001133 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001134 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001135 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136 }
1137
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001138 dec_unacked(device);
1139 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001141 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001142 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001143 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144}
1145
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001146int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001148 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001149 struct drbd_peer_device *peer_device = peer_req->peer_device;
1150 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001151 sector_t sector = peer_req->i.sector;
1152 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153 int digest_size;
1154 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001155 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156
1157 if (unlikely(cancel))
1158 goto out;
1159
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001160 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001161 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001162 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001163 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001164 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001165 }
1166
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001167 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001168 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001169 else
1170 memset(digest, 0, digest_size);
1171
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001172 /* Free e and pages before send.
1173 * In case we block on congestion, we could otherwise run into
1174 * some distributed deadlock, if the other side blocks on
1175 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001176 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001177 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001178 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001179 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001180 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001181 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001182 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001183 kfree(digest);
1184
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001186 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001187 drbd_free_peer_req(device, peer_req);
1188 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001189 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190}
1191
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001192void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001193{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001194 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1195 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001197 device->ov_last_oos_start = sector;
1198 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001200 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201}
1202
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001203int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001205 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001206 struct drbd_peer_device *peer_device = peer_req->peer_device;
1207 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001210 sector_t sector = peer_req->i.sector;
1211 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001212 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001213 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001214 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215
1216 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001217 drbd_free_peer_req(device, peer_req);
1218 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001219 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 }
1221
1222 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1223 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001224 if (get_ldev(device)) {
1225 drbd_rs_complete_io(device, peer_req->i.sector);
1226 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001227 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001229 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001231 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001232 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 digest = kmalloc(digest_size, GFP_NOIO);
1234 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001235 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001237 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 eq = !memcmp(digest, di->digest, digest_size);
1239 kfree(digest);
1240 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241 }
1242
Lars Ellenberg9676c762011-02-22 14:02:31 +01001243 /* Free peer_req and pages before send.
1244 * In case we block on congestion, we could otherwise run into
1245 * some distributed deadlock, if the other side blocks on
1246 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001247 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001248 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001249 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001250 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001252 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001254 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001255 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001257 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001259 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001260
1261 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001262 if ((device->ov_left & 0x200) == 0x200)
1263 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001265 stop_sector_reached = verify_can_do_stop_sector(device) &&
1266 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001267
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001268 if (device->ov_left == 0 || stop_sector_reached) {
1269 ov_out_of_sync_print(device);
1270 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271 }
1272
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001273 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274}
1275
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001276/* FIXME
1277 * We need to track the number of pending barrier acks,
1278 * and to be able to wait for them.
1279 * See also comment in drbd_adm_attach before drbd_suspend_io.
1280 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001281static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001283 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001284 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001286 sock = &connection->data;
1287 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001288 if (!p)
1289 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001290 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001291 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001292 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001293
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001294 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295}
1296
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001297int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001299 struct drbd_device *device =
1300 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001301 struct drbd_socket *sock;
1302
Philipp Reisnerb411b362009-09-25 16:07:19 -07001303 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001304 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001305 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001306 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001307 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001308 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001309}
1310
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001311static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001312{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001313 if (!connection->send.seen_any_write_yet) {
1314 connection->send.seen_any_write_yet = true;
1315 connection->send.current_epoch_nr = epoch;
1316 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001317 }
1318}
1319
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001320static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001321{
1322 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001323 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001324 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001325 if (connection->send.current_epoch_nr != epoch) {
1326 if (connection->send.current_epoch_writes)
1327 drbd_send_barrier(connection);
1328 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001329 }
1330}
1331
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001332int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001333{
1334 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001335 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001336 struct drbd_peer_device *const peer_device = first_peer_device(device);
1337 struct drbd_connection *const connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001338 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001339
1340 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001341 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001342 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001343 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001344 req->pre_send_jif = jiffies;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001345
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001346 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001347 * If it was sent, it was the closing barrier for the last
1348 * replicated epoch, before we went into AHEAD mode.
1349 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001350 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001351
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001352 err = drbd_send_out_of_sync(peer_device, req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001353 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001354
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001355 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001356}
1357
Philipp Reisnerb411b362009-09-25 16:07:19 -07001358/**
1359 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360 * @w: work object.
1361 * @cancel: The connection will be closed anyways
1362 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001363int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001364{
1365 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001366 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001367 struct drbd_peer_device *const peer_device = first_peer_device(device);
1368 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001369 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001370
1371 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001372 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001373 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001374 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001375 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001377 re_init_if_first_write(connection, req->epoch);
1378 maybe_send_barrier(connection, req->epoch);
1379 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001380
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001381 err = drbd_send_dblock(peer_device, req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001382 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001384 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385}
1386
1387/**
1388 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001389 * @w: work object.
1390 * @cancel: The connection will be closed anyways
1391 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001392int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001393{
1394 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001395 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001396 struct drbd_peer_device *const peer_device = first_peer_device(device);
1397 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001398 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001399
1400 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001401 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001402 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001404 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001406 /* Even read requests may close a write epoch,
1407 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001408 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001409
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001410 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001411 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001412
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001413 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001414
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001415 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001416}
1417
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001418int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001419{
1420 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001421 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001422
Philipp Reisner07782862010-08-31 12:00:50 +02001423 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01001424 drbd_al_begin_io(device, &req->i);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001425
1426 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001427 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001428 generic_make_request(req->private_bio);
1429
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001430 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001431}
1432
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001433static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001434{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001435 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001436 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437
1438 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001439 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001440 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001441 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001442 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001443 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001444 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001446 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001447 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001448 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449 if ((odev->state.conn >= C_SYNC_SOURCE &&
1450 odev->state.conn <= C_PAUSED_SYNC_T) ||
1451 odev->state.aftr_isp || odev->state.peer_isp ||
1452 odev->state.user_isp)
1453 return 0;
1454 }
1455}
1456
1457/**
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001458 * drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001459 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460 *
1461 * Called from process context only (admin command and after_state_ch).
1462 */
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001463static bool drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001465 bool changed = false;
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001466 struct drbd_device *odev;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001467 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468
Philipp Reisner695d08f2011-04-11 22:53:32 -07001469 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001470 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001471 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1472 continue;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001473 if (!_drbd_may_sync_now(odev) &&
1474 _drbd_set_state(_NS(odev, aftr_isp, 1),
1475 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1476 changed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001478 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001480 return changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481}
1482
1483/**
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001484 * drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001485 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486 *
1487 * Called from process context only (admin command and worker).
1488 */
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001489static bool drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001491 bool changed = false;
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001492 struct drbd_device *odev;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001493 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494
Philipp Reisner695d08f2011-04-11 22:53:32 -07001495 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001496 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1498 continue;
1499 if (odev->state.aftr_isp) {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001500 if (_drbd_may_sync_now(odev) &&
1501 _drbd_set_state(_NS(odev, aftr_isp, 0),
1502 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1503 changed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001504 }
1505 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001506 rcu_read_unlock();
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001507 return changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508}
1509
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001510void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001512 lock_all_resources();
1513 drbd_resume_next(device);
1514 unlock_all_resources();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515}
1516
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001517void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001519 lock_all_resources();
1520 drbd_pause_after(device);
1521 unlock_all_resources();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522}
1523
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001524/* caller must lock_all_resources() */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001525enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001527 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001528 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001529
1530 if (o_minor == -1)
1531 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001532 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001533 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001534
1535 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001536 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001538 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001539 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001541 /* You are free to depend on diskless, non-existing,
1542 * or not yet/no longer existing minors.
1543 * We only reject dependency loops.
1544 * We cannot follow the dependency chain beyond a detached or
1545 * missing minor.
1546 */
1547 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1548 return NO_ERROR;
1549
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001550 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001551 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001552 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001553 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001554 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 return NO_ERROR;
1556
1557 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001558 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 }
1560}
1561
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001562/* caller must lock_all_resources() */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001563void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001565 int changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001567 do {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001568 changed = drbd_pause_after(device);
1569 changed |= drbd_resume_next(device);
1570 } while (changed);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571}
1572
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001573void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001574{
Lars Ellenbergff8bd882014-11-10 17:21:12 +01001575 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Philipp Reisner813472c2011-05-03 16:47:02 +02001576 struct fifo_buffer *plan;
1577
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001578 atomic_set(&device->rs_sect_in, 0);
1579 atomic_set(&device->rs_sect_ev, 0);
1580 device->rs_in_flight = 0;
Lars Ellenbergff8bd882014-11-10 17:21:12 +01001581 device->rs_last_events =
1582 (int)part_stat_read(&disk->part0, sectors[0]) +
1583 (int)part_stat_read(&disk->part0, sectors[1]);
Philipp Reisner813472c2011-05-03 16:47:02 +02001584
1585 /* Updating the RCU protected object in place is necessary since
1586 this function gets called from atomic context.
1587 It is valid since all other updates also lead to an completely
1588 empty fifo */
1589 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001590 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001591 plan->total = 0;
1592 fifo_set(plan, 0);
1593 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001594}
1595
Philipp Reisner1f04af32011-02-07 11:33:59 +01001596void start_resync_timer_fn(unsigned long data)
1597{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001598 struct drbd_device *device = (struct drbd_device *) data;
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001599 drbd_device_post_work(device, RS_START);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001600}
1601
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001602static void do_start_resync(struct drbd_device *device)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001603{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001604 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001605 drbd_warn(device, "postponing start_resync ...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001606 device->start_resync_timer.expires = jiffies + HZ/10;
1607 add_timer(&device->start_resync_timer);
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001608 return;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001609 }
1610
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001611 drbd_start_resync(device, C_SYNC_SOURCE);
1612 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001613}
1614
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001615static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1616{
1617 bool csums_after_crash_only;
1618 rcu_read_lock();
1619 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1620 rcu_read_unlock();
1621 return connection->agreed_pro_version >= 89 && /* supported? */
1622 connection->csums_tfm && /* configured? */
1623 (csums_after_crash_only == 0 /* use for each resync? */
1624 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1625}
1626
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627/**
1628 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001629 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1631 *
1632 * This function might bring you directly into one of the
1633 * C_PAUSED_SYNC_* states.
1634 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001635void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001637 struct drbd_peer_device *peer_device = first_peer_device(device);
1638 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639 union drbd_state ns;
1640 int r;
1641
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001642 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001643 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644 return;
1645 }
1646
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001647 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001648 if (side == C_SYNC_TARGET) {
1649 /* Since application IO was locked out during C_WF_BITMAP_T and
1650 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1651 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001652 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001653 r = (r >> 8) & 0xff;
1654 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001655 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001656 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001657 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001658 return;
1659 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001660 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001661 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001662 r = (r >> 8) & 0xff;
1663 if (r > 0) {
1664 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001665 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001666 "ignoring. Old userland tools?", r);
1667 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001668 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001669 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001670 conn_request_state(connection,
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001671 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001672 return;
1673 }
1674 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001675 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676 }
1677
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001678 if (current == connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001679 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001680 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001681 if (!mutex_trylock(device->state_mutex)) {
1682 set_bit(B_RS_H_DONE, &device->flags);
1683 device->start_resync_timer.expires = jiffies + HZ/5;
1684 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001685 return;
1686 }
1687 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001688 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001689 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001691 lock_all_resources();
1692 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnera7004712013-03-27 14:08:35 +01001693 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001694 if (device->state.conn < C_CONNECTED
1695 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001696 unlock_all_resources();
1697 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698 }
1699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001702 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703
1704 ns.conn = side;
1705
1706 if (side == C_SYNC_TARGET)
1707 ns.disk = D_INCONSISTENT;
1708 else /* side == C_SYNC_SOURCE */
1709 ns.pdsk = D_INCONSISTENT;
1710
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001711 r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001712 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713
1714 if (ns.conn < C_CONNECTED)
1715 r = SS_UNKNOWN_ERROR;
1716
1717 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001718 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001719 unsigned long now = jiffies;
1720 int i;
1721
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 device->rs_failed = 0;
1723 device->rs_paused = 0;
1724 device->rs_same_csum = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001725 device->rs_last_sect_ev = 0;
1726 device->rs_total = tw;
1727 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001728 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729 device->rs_mark_left[i] = tw;
1730 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001731 }
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001732 drbd_pause_after(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001733 /* Forget potentially stale cached per resync extent bit-counts.
1734 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1735 * disabled, and know the disk state is ok. */
1736 spin_lock(&device->al_lock);
1737 lc_reset(device->resync);
1738 device->resync_locked = 0;
1739 device->resync_wenr = LC_FREE;
1740 spin_unlock(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741 }
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001742 unlock_all_resources();
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001743
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744 if (r == SS_SUCCESS) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001745 wake_up(&device->al_wait); /* for lc_reset() above */
Philipp Reisner328e0f12012-10-19 14:37:47 +02001746 /* reset rs_last_bcast when a resync or verify is started,
1747 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001748 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001749
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001750 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001752 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1753 (unsigned long) device->rs_total);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001754 if (side == C_SYNC_TARGET) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001755 device->bm_resync_fo = 0;
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001756 device->use_csums = use_checksum_based_resync(connection, device);
1757 } else {
1758 device->use_csums = 0;
1759 }
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001760
1761 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1762 * with w_send_oos, or the sync target will get confused as to
1763 * how much bits to resync. We cannot do that always, because for an
1764 * empty resync and protocol < 95, we need to do it here, as we call
1765 * drbd_resync_finished from here in that case.
1766 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1767 * and from after_state_ch otherwise. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001768 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1769 drbd_gen_and_send_sync_uuid(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001771 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001772 /* This still has a race (about when exactly the peers
1773 * detect connection loss) that can lead to a full sync
1774 * on next handshake. In 8.3.9 we fixed this with explicit
1775 * resync-finished notifications, but the fix
1776 * introduces a protocol change. Sleeping for some
1777 * time longer than the ping interval + timeout on the
1778 * SyncSource, to give the SyncTarget the chance to
1779 * detect connection loss, then waiting for a ping
1780 * response (implicit in drbd_resync_finished) reduces
1781 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001782 if (side == C_SYNC_SOURCE) {
1783 struct net_conf *nc;
1784 int timeo;
1785
1786 rcu_read_lock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001787 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001788 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1789 rcu_read_unlock();
1790 schedule_timeout_interruptible(timeo);
1791 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001792 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793 }
1794
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001795 drbd_rs_controller_reset(device);
1796 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 * we may have been paused in between, or become paused until
1798 * the timer triggers.
1799 * No matter, that is handled in resync_timer_fn() */
1800 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001801 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001803 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001805 put_ldev(device);
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001806out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001807 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808}
1809
Lars Ellenberge334f552014-02-11 09:30:49 +01001810static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001811{
1812 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1813 device->rs_last_bcast = jiffies;
1814
1815 if (!get_ldev(device))
1816 return;
1817
1818 drbd_bm_write_lazy(device, 0);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001819 if (resync_done && is_sync_state(device->state.conn))
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001820 drbd_resync_finished(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001821
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001822 drbd_bcast_event(device, &sib);
1823 /* update timestamp, in case it took a while to write out stuff */
1824 device->rs_last_bcast = jiffies;
1825 put_ldev(device);
1826}
1827
Lars Ellenberge334f552014-02-11 09:30:49 +01001828static void drbd_ldev_destroy(struct drbd_device *device)
1829{
1830 lc_destroy(device->resync);
1831 device->resync = NULL;
1832 lc_destroy(device->act_log);
1833 device->act_log = NULL;
Andreas Gruenbacherd1b80852014-09-11 14:29:09 +02001834
1835 __acquire(local);
1836 drbd_free_ldev(device->ldev);
1837 device->ldev = NULL;
1838 __release(local);
1839
Lars Ellenberge334f552014-02-11 09:30:49 +01001840 clear_bit(GOING_DISKLESS, &device->flags);
1841 wake_up(&device->misc_wait);
1842}
1843
1844static void go_diskless(struct drbd_device *device)
1845{
1846 D_ASSERT(device, device->state.disk == D_FAILED);
1847 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1848 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1849 * the protected members anymore, though, so once put_ldev reaches zero
1850 * again, it will be safe to free them. */
1851
1852 /* Try to write changed bitmap pages, read errors may have just
1853 * set some bits outside the area covered by the activity log.
1854 *
1855 * If we have an IO error during the bitmap writeout,
1856 * we will want a full sync next time, just in case.
1857 * (Do we want a specific meta data flag for this?)
1858 *
1859 * If that does not make it to stable storage either,
1860 * we cannot do anything about that anymore.
1861 *
1862 * We still need to check if both bitmap and ldev are present, we may
1863 * end up here after a failed attach, before ldev was even assigned.
1864 */
1865 if (device->bitmap && device->ldev) {
1866 /* An interrupted resync or similar is allowed to recounts bits
1867 * while we detach.
1868 * Any modifications would not be expected anymore, though.
1869 */
1870 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1871 "detach", BM_LOCKED_TEST_ALLOWED)) {
1872 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1873 drbd_md_set_flag(device, MDF_FULL_SYNC);
1874 drbd_md_sync(device);
1875 }
1876 }
1877 }
1878
1879 drbd_force_state(device, NS(disk, D_DISKLESS));
1880}
1881
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001882static int do_md_sync(struct drbd_device *device)
1883{
1884 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1885 drbd_md_sync(device);
1886 return 0;
1887}
1888
Lars Ellenberg944410e2014-05-06 15:02:05 +02001889/* only called from drbd_worker thread, no locking */
1890void __update_timing_details(
1891 struct drbd_thread_timing_details *tdp,
1892 unsigned int *cb_nr,
1893 void *cb,
1894 const char *fn, const unsigned int line)
1895{
1896 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1897 struct drbd_thread_timing_details *td = tdp + i;
1898
1899 td->start_jif = jiffies;
1900 td->cb_addr = cb;
1901 td->caller_fn = fn;
1902 td->line = line;
1903 td->cb_nr = *cb_nr;
1904
1905 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1906 td = tdp + i;
1907 memset(td, 0, sizeof(*td));
1908
1909 ++(*cb_nr);
1910}
1911
Lars Ellenberge334f552014-02-11 09:30:49 +01001912static void do_device_work(struct drbd_device *device, const unsigned long todo)
1913{
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001914 if (test_bit(MD_SYNC, &todo))
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001915 do_md_sync(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001916 if (test_bit(RS_DONE, &todo) ||
1917 test_bit(RS_PROGRESS, &todo))
1918 update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
1919 if (test_bit(GO_DISKLESS, &todo))
Lars Ellenberge334f552014-02-11 09:30:49 +01001920 go_diskless(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001921 if (test_bit(DESTROY_DISK, &todo))
Lars Ellenberge334f552014-02-11 09:30:49 +01001922 drbd_ldev_destroy(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02001923 if (test_bit(RS_START, &todo))
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001924 do_start_resync(device);
Lars Ellenberge334f552014-02-11 09:30:49 +01001925}
1926
1927#define DRBD_DEVICE_WORK_MASK \
1928 ((1UL << GO_DISKLESS) \
1929 |(1UL << DESTROY_DISK) \
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001930 |(1UL << MD_SYNC) \
1931 |(1UL << RS_START) \
Lars Ellenberge334f552014-02-11 09:30:49 +01001932 |(1UL << RS_PROGRESS) \
1933 |(1UL << RS_DONE) \
1934 )
1935
1936static unsigned long get_work_bits(unsigned long *flags)
1937{
1938 unsigned long old, new;
1939 do {
1940 old = *flags;
1941 new = old & ~DRBD_DEVICE_WORK_MASK;
1942 } while (cmpxchg(flags, old, new) != old);
1943 return old & DRBD_DEVICE_WORK_MASK;
1944}
1945
1946static void do_unqueued_work(struct drbd_connection *connection)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001947{
1948 struct drbd_peer_device *peer_device;
1949 int vnr;
1950
1951 rcu_read_lock();
1952 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1953 struct drbd_device *device = peer_device->device;
Lars Ellenberge334f552014-02-11 09:30:49 +01001954 unsigned long todo = get_work_bits(&device->flags);
1955 if (!todo)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001956 continue;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001957
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001958 kref_get(&device->kref);
1959 rcu_read_unlock();
Lars Ellenberge334f552014-02-11 09:30:49 +01001960 do_device_work(device, todo);
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001961 kref_put(&device->kref, drbd_destroy_device);
1962 rcu_read_lock();
1963 }
1964 rcu_read_unlock();
1965}
1966
Rashika Kheriaa186e472013-12-19 15:06:10 +05301967static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001968{
1969 spin_lock_irq(&queue->q_lock);
Lars Ellenberg15e26f62014-04-28 11:43:21 +02001970 list_splice_tail_init(&queue->q, work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001971 spin_unlock_irq(&queue->q_lock);
1972 return !list_empty(work_list);
1973}
1974
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001975static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001976{
1977 DEFINE_WAIT(wait);
1978 struct net_conf *nc;
1979 int uncork, cork;
1980
Lars Ellenbergabde9cc2014-09-11 14:29:11 +02001981 dequeue_work_batch(&connection->sender_work, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001982 if (!list_empty(work_list))
1983 return;
1984
1985 /* Still nothing to do?
1986 * Maybe we still need to close the current epoch,
1987 * even if no new requests are queued yet.
1988 *
1989 * Also, poke TCP, just in case.
1990 * Then wait for new work (or signal). */
1991 rcu_read_lock();
1992 nc = rcu_dereference(connection->net_conf);
1993 uncork = nc ? nc->tcp_cork : 0;
1994 rcu_read_unlock();
1995 if (uncork) {
1996 mutex_lock(&connection->data.mutex);
1997 if (connection->data.socket)
1998 drbd_tcp_uncork(connection->data.socket);
1999 mutex_unlock(&connection->data.mutex);
2000 }
2001
2002 for (;;) {
2003 int send_barrier;
2004 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002005 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002006 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02002007 if (!list_empty(&connection->sender_work.q))
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002008 list_splice_tail_init(&connection->sender_work.q, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002009 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
2010 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002011 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002012 break;
2013 }
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002014
2015 /* We found nothing new to do, no to-be-communicated request,
2016 * no other work item. We may still need to close the last
2017 * epoch. Next incoming request epoch will be connection ->
2018 * current transfer log epoch number. If that is different
2019 * from the epoch of the last request we communicated, it is
2020 * safe to send the epoch separating barrier now.
2021 */
2022 send_barrier =
2023 atomic_read(&connection->current_tle_nr) !=
2024 connection->send.current_epoch_nr;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002025 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002026
2027 if (send_barrier)
2028 maybe_send_barrier(connection,
2029 connection->send.current_epoch_nr + 1);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002030
Lars Ellenberge334f552014-02-11 09:30:49 +01002031 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002032 break;
2033
Lars Ellenberga80ca1a2013-12-27 17:17:25 +01002034 /* drbd_send() may have called flush_signals() */
2035 if (get_t_state(&connection->worker) != RUNNING)
2036 break;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002037
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002038 schedule();
2039 /* may be woken up for other things but new work, too,
2040 * e.g. if the current epoch got closed.
2041 * In which case we send the barrier above. */
2042 }
2043 finish_wait(&connection->sender_work.q_wait, &wait);
2044
2045 /* someone may have changed the config while we have been waiting above. */
2046 rcu_read_lock();
2047 nc = rcu_dereference(connection->net_conf);
2048 cork = nc ? nc->tcp_cork : 0;
2049 rcu_read_unlock();
2050 mutex_lock(&connection->data.mutex);
2051 if (connection->data.socket) {
2052 if (cork)
2053 drbd_tcp_cork(connection->data.socket);
2054 else if (!uncork)
2055 drbd_tcp_uncork(connection->data.socket);
2056 }
2057 mutex_unlock(&connection->data.mutex);
2058}
2059
Philipp Reisnerb411b362009-09-25 16:07:19 -07002060int drbd_worker(struct drbd_thread *thi)
2061{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002062 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002063 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002064 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002065 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002066 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002067
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002068 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01002069 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070
Lars Ellenberg944410e2014-05-06 15:02:05 +02002071 if (list_empty(&work_list)) {
2072 update_worker_timing_details(connection, wait_for_work);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002073 wait_for_work(connection, &work_list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002074 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075
Lars Ellenberg944410e2014-05-06 15:02:05 +02002076 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2077 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002078 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002079 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002080
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002081 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01002083 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02002084 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002085 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01002086 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002087 break;
2088 }
2089
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002090 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002092
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002093 if (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002094 w = list_first_entry(&work_list, struct drbd_work, list);
2095 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002096 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002097 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002098 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002099 if (connection->cstate >= C_WF_REPORT_PARAMS)
2100 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002101 }
2102 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002103
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002104 do {
Lars Ellenberg944410e2014-05-06 15:02:05 +02002105 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2106 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002107 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002108 }
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002109 if (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002110 w = list_first_entry(&work_list, struct drbd_work, list);
2111 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002112 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002113 w->cb(w, 1);
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002114 } else
2115 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberge334f552014-02-11 09:30:49 +01002116 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002117
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002118 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002119 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2120 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002121 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002122 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002123 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002124 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02002125 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002126 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01002127 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002128 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002129
2130 return 0;
2131}